{
  "_id": "6a1f0a77b401979e7341c543",
  "Package": "cat2cat",
  "Title": "Handling an Inconsistently Coded Categorical Variable in a\nLongitudinal Dataset",
  "Version": "0.6.1.9000",
  "Authors@R": "c(\nperson(\"Maciej\", \"Nasinski\", email = \"nasinski.maciej@gmail.com\", role = c(\"aut\", \"cre\"),\ncomment = c(ORCID = \"0000-0001-5236-8040\"))\n)",
  "Maintainer": "Maciej Nasinski <nasinski.maciej@gmail.com>",
  "Description": "Unifying an inconsistently coded categorical variable\nbetween two different time points in accordance with a mapping\ntable. The main rule is to replicate the observation if it\ncould be assigned to a few categories. Then using frequencies\nor statistical methods to approximate the probabilities of\nbeing assigned to each of them. This procedure was invented and\nimplemented in the paper by 'Nasinski', 'Majchrowska', and\n'Broniatowska' (2020) <doi:10.24425/cejeme.2020.134747>.",
  "License": "GPL (>= 2) | file LICENSE",
  "URL": "https://github.com/Polkas/cat2cat,\nhttps://polkas.github.io/cat2cat/",
  "BugReports": "https://github.com/Polkas/cat2cat/issues",
  "Encoding": "UTF-8",
  "LazyData": "true",
  "VignetteBuilder": "knitr",
  "Config/testthat/edition": "3",
  "Config/roxygen2/version": "8.0.0",
  "Language": "en-US",
  "Repository": "https://polkas.r-universe.dev",
  "Date/Publication": "2026-05-17 21:43:06 UTC",
  "RemoteUrl": "https://github.com/polkas/cat2cat",
  "RemoteRef": "HEAD",
  "RemoteSha": "e1f0c6368acdd088ad08223b630fa3e0cf40fe6d",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-17 22:01:04 UTC",
    "User": "root"
  },
  "Author": "Maciej Nasinski [aut, cre] (ORCID:\n<https://orcid.org/0000-0001-5236-8040>)",
  "MD5sum": "1626447b12d24de124ad51b46c220aa4",
  "_user": "polkas",
  "_type": "src",
  "_file": "cat2cat_0.6.1.9000.tar.gz",
  "_fileid": "92ae4510d66b6152ddede7aac3bcdd92060b48e0590c82ed58f7020c50a18bf9",
  "_filesize": 2713513,
  "_sha256": "92ae4510d66b6152ddede7aac3bcdd92060b48e0590c82ed58f7020c50a18bf9",
  "_created": "2026-05-17T22:01:04.000Z",
  "_published": "2026-06-02T16:53:11.353Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79125274788,
      "time": 282,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7046464410"
    },
    {
      "job": 79125274736,
      "time": 285,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7046464781"
    },
    {
      "job": 79125274988,
      "time": 172,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7046452154"
    },
    {
      "job": 79125274428,
      "time": 224,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7046457911"
    },
    {
      "job": 79125274111,
      "time": 259,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7046431846"
    },
    {
      "job": 79125273664,
      "time": 121,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7364315616"
    },
    {
      "job": 79125274280,
      "time": 265,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7046462512"
    },
    {
      "job": 79125274594,
      "time": 248,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7046460708"
    },
    {
      "job": 79125274546,
      "time": 250,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7046461058"
    }
  ],
  "_buildurl": "https://github.com/r-universe/polkas/actions/runs/26003846938",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/polkas/cat2cat",
  "_commit": {
    "id": "e1f0c6368acdd088ad08223b630fa3e0cf40fe6d",
    "author": "Maciej Nasinski <nasinski.maciej@gmail.com>",
    "committer": "GitHub <noreply@github.com>",
    "message": "devel version",
    "time": 1779054186
  },
  "_maintainer": {
    "name": "Maciej Nasinski",
    "email": "nasinski.maciej@gmail.com",
    "login": "polkas",
    "description": "Maciej Nasinski - Data Scientist",
    "uuid": 10676545,
    "orcid": "0000-0001-5236-8040"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.6",
      "role": "Depends"
    },
    {
      "package": "MASS",
      "role": "Imports"
    },
    {
      "package": "caret",
      "role": "Suggests"
    },
    {
      "package": "dplyr",
      "role": "Suggests"
    },
    {
      "package": "e1071",
      "role": "Suggests"
    },
    {
      "package": "fixest",
      "role": "Suggests"
    },
    {
      "package": "forcats",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "magrittr",
      "role": "Suggests"
    },
    {
      "package": "randomForest",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "spelling",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    },
    {
      "package": "tidyr",
      "role": "Suggests"
    }
  ],
  "_owner": "polkas",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2026-19",
      "n": 2
    },
    {
      "week": "2026-20",
      "n": 2
    }
  ],
  "_tags": [
    {
      "name": "v0.6.1",
      "date": "2026-05-17"
    }
  ],
  "_topics": [
    "categories",
    "encoding",
    "encodings",
    "factor",
    "longitudinal",
    "mapping",
    "mappings",
    "panel",
    "transitions"
  ],
  "_stars": 5,
  "_contributors": [
    {
      "user": "polkas",
      "count": 120,
      "uuid": 10676545
    }
  ],
  "_userbio": {
    "uuid": 10676545,
    "type": "user",
    "name": "Maciej Nasinski",
    "description": "Maciej Nasinski - Data Scientist"
  },
  "_downloads": {
    "count": 238,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/cat2cat"
  },
  "_devurl": "https://github.com/polkas/cat2cat",
  "_pkgdown": "https://polkas.github.io/cat2cat/",
  "_searchresults": 5,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/cat2cat.html",
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/polkas/cat2cat",
  "_realowner": "polkas",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.3",
      "date": "2020-09-22"
    },
    {
      "version": "0.1.7",
      "date": "2020-11-29"
    },
    {
      "version": "0.2.0",
      "date": "2020-12-08"
    },
    {
      "version": "0.2.1",
      "date": "2021-03-27"
    },
    {
      "version": "0.3.1",
      "date": "2022-02-06"
    },
    {
      "version": "0.3.3",
      "date": "2022-03-11"
    },
    {
      "version": "0.4.1",
      "date": "2022-06-08"
    },
    {
      "version": "0.4.2",
      "date": "2022-08-05"
    },
    {
      "version": "0.4.4",
      "date": "2022-08-31"
    },
    {
      "version": "0.4.5",
      "date": "2022-09-25"
    },
    {
      "version": "0.4.6",
      "date": "2023-02-01"
    },
    {
      "version": "0.4.7",
      "date": "2024-01-22"
    }
  ],
  "_exports": [
    "cat_apply_freq",
    "cat2cat",
    "cat2cat_agg",
    "cat2cat_ml_run",
    "cross_c2c",
    "dummy_c2c",
    "get_freqs",
    "get_mappings",
    "plot_c2c",
    "prune_c2c",
    "summary_c2c"
  ],
  "_datasets": [
    {
      "name": "occup",
      "title": "Occupational dataset",
      "object": "occup",
      "class": [
        "tbl_df",
        "tbl",
        "data.frame"
      ],
      "fields": [
        "id",
        "age",
        "sex",
        "edu",
        "exp",
        "district",
        "parttime",
        "salary",
        "code",
        "multiplier",
        "year",
        "code4"
      ],
      "rows": 69126,
      "table": true,
      "tojson": true
    },
    {
      "name": "occup_panel",
      "title": "Occupational panel dataset with BAEL-style quarterly rotation",
      "object": "occup_panel",
      "class": [
        "tbl_df",
        "tbl",
        "data.frame"
      ],
      "fields": [
        "id",
        "age",
        "sex",
        "edu",
        "exp",
        "district",
        "parttime",
        "salary",
        "code",
        "multiplier",
        "code4",
        "panel_id",
        "cohort",
        "quarter",
        "year",
        "quarter_num"
      ],
      "rows": 3900,
      "table": true,
      "tojson": true
    },
    {
      "name": "occup_small",
      "title": "Occupational dataset - small one",
      "object": "occup_small",
      "class": [
        "tbl_df",
        "tbl",
        "data.frame"
      ],
      "fields": [
        "id",
        "age",
        "sex",
        "edu",
        "exp",
        "district",
        "parttime",
        "salary",
        "code",
        "multiplier",
        "year",
        "code4"
      ],
      "rows": 8000,
      "table": true,
      "tojson": true
    },
    {
      "name": "trans",
      "title": "trans dataset containing mappings (transitions) between old (2008) and new (2010) occupational codes. This table could be used to map encodings in both directions.",
      "object": "trans",
      "class": [
        "tbl_df",
        "tbl",
        "data.frame"
      ],
      "fields": [
        "old",
        "new"
      ],
      "rows": 2666,
      "table": true,
      "tojson": true
    },
    {
      "name": "verticals",
      "title": "verticals dataset",
      "object": "verticals",
      "class": [
        "data.frame"
      ],
      "fields": [
        "vertical",
        "sales",
        "counts",
        "v_date"
      ],
      "rows": 21,
      "table": true,
      "tojson": true
    },
    {
      "name": "verticals2",
      "title": "verticals2 dataset",
      "object": "verticals2",
      "class": [
        "data.frame"
      ],
      "fields": [
        "ean",
        "vertical",
        "sales",
        "v_date"
      ],
      "rows": 200,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "cat_apply_freq",
      "title": "Applying frequencies to the object returned by the `get_mappings` function",
      "topics": [
        "cat_apply_freq"
      ]
    },
    {
      "page": "cat2cat",
      "title": "Automatic mapping in a panel dataset",
      "topics": [
        "cat2cat"
      ]
    },
    {
      "page": "cat2cat_agg",
      "title": "Manual mapping for an aggregated panel dataset",
      "topics": [
        "cat2cat_agg"
      ]
    },
    {
      "page": "cat2cat_ml_run",
      "title": "Cross-validation diagnostics for cat2cat ML models",
      "topics": [
        "cat2cat_ml_run",
        "print.cat2cat_ml_run"
      ]
    },
    {
      "page": "cross_c2c",
      "title": "Make a combination of weights from different methods",
      "topics": [
        "cross_c2c"
      ]
    },
    {
      "page": "dummy_c2c",
      "title": "Add default cat2cat columns to a `data.frame`",
      "topics": [
        "dummy_c2c"
      ]
    },
    {
      "page": "get_freqs",
      "title": "Getting frequencies from a vector with an optional multiplier",
      "topics": [
        "get_freqs"
      ]
    },
    {
      "page": "get_mappings",
      "title": "Transforming a mapping (transition) table to two associative lists",
      "topics": [
        "get_mappings"
      ]
    },
    {
      "page": "occup",
      "title": "Occupational dataset",
      "topics": [
        "occup"
      ]
    },
    {
      "page": "occup_panel",
      "title": "Occupational panel dataset with BAEL-style quarterly rotation",
      "topics": [
        "occup_panel"
      ]
    },
    {
      "page": "occup_small",
      "title": "Occupational dataset - small one",
      "topics": [
        "occup_small"
      ]
    },
    {
      "page": "plot_c2c",
      "title": "Summary plots for cat2cat results",
      "topics": [
        "plot_c2c"
      ]
    },
    {
      "page": "prune_c2c",
      "title": "Pruning which could be useful after the mapping process",
      "topics": [
        "prune_c2c"
      ]
    },
    {
      "page": "summary_c2c",
      "title": "Adjusted summary for regressions on replicated datasets",
      "topics": [
        "summary_c2c"
      ]
    },
    {
      "page": "trans",
      "title": "trans dataset containing mappings (transitions) between old (2008) and new (2010) occupational codes. This table could be used to map encodings in both directions.",
      "topics": [
        "trans"
      ]
    },
    {
      "page": "verticals",
      "title": "verticals dataset",
      "topics": [
        "verticals"
      ]
    },
    {
      "page": "verticals2",
      "title": "verticals2 dataset",
      "topics": [
        "verticals2"
      ]
    }
  ],
  "_readme": "https://github.com/polkas/cat2cat/raw/HEAD/README.md",
  "_rundeps": [
    "MASS"
  ],
  "_vignettes": [
    {
      "source": "cat2cat_advanced.Rmd",
      "filename": "cat2cat_advanced.html",
      "title": "Advanced Workflows",
      "author": "Maciej Nasinski",
      "engine": "knitr::rmarkdown",
      "headings": [
        "ML weights",
        "Multi-period chaining",
        "Optional: passing mappings$freqs_df",
        "Mapping table stability and truncation",
        "Backward chaining",
        "Forward chaining",
        "Adding ML to the chain",
        "Panel data with subject identifiers",
        "Aggregated data and special cases",
        "Regression on replicated data",
        "Building a 4-period harmonised repeated cross-section dataset",
        "Neutral impact demonstration",
        "Fixed effects regression",
        "Choosing the right advanced workflow",
        "Next steps"
      ],
      "created": "2026-05-08 01:03:21",
      "modified": "2026-05-17 21:11:31",
      "commits": 2
    },
    {
      "source": "cat2cat_validation.Rmd",
      "filename": "cat2cat_validation.html",
      "title": "Choosing Weights and Validating ML",
      "author": "Maciej Nasinski",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Step 1: Understand the competing weight assumptions",
        "If ML fails on some rows: on_fail and fail_warn",
        "Step 2: Check whether conclusions are sensitive to the weight choice",
        "Compare weight methods on the same mapped data",
        "Compare pruning strategies only after comparing full weights",
        "Compare ensemble compositions when no single method dominates",
        "Step 3: Validate whether ML actually improves on simpler baselines",
        "What cat2cat_ml_run() is doing",
        "Minimal validation workflow",
        "Compare multiple ML methods in one run",
        "Inspect per-group diagnostics when methods disagree",
        "Decision rules for interpreting the output"
      ],
      "created": "2026-05-08 01:03:21",
      "modified": "2026-05-17 21:11:31",
      "commits": 2
    },
    {
      "source": "cat2cat.Rmd",
      "filename": "cat2cat.html",
      "title": "Get Started with cat2cat",
      "author": "Maciej Nasinski",
      "engine": "knitr::rmarkdown",
      "headings": [
        "What is cat2cat?",
        "How to read the documentation",
        "The Problem",
        "The Solution",
        "Value Added of cat2cat",
        "Harmonisation Assumptions and Sensitivity",
        "Three weighting schemes",
        "When each assumption matters",
        "Reducing assumption dependence",
        "When cat2cat won't help",
        "If you need something more advanced",
        "Use Cases",
        "Key Concepts",
        "Direction: Backward vs Forward",
        "ML weights in practice",
        "Weights",
        "Quick Example",
        "Forward mapping example",
        "Naive vs Frequency Weights",
        "Value added: pooled regression across both periods",
        "Diagnostic Plot",
        "Mapping table",
        "Common hierarchical classifications",
        "Truncating mapping tables from hierarchical codes",
        "Learn More"
      ],
      "created": "2022-03-10 18:40:16",
      "modified": "2026-05-17 21:11:31",
      "commits": 34
    }
  ],
  "_score": 5.176091259055681,
  "_indexed": true,
  "_nocasepkg": "cat2cat",
  "_universes": [
    "polkas"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.6.1.9000",
      "date": "2026-05-17T22:03:25.000Z",
      "distro": "noble",
      "commit": "e1f0c6368acdd088ad08223b630fa3e0cf40fe6d",
      "fileid": "be9aaeea7572278d0b2c1314f542c088109b1e31dd7d58c7961069454eabdeaf",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/polkas/actions/runs/26003846938"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.6.1.9000",
      "date": "2026-05-17T22:03:27.000Z",
      "distro": "noble",
      "commit": "e1f0c6368acdd088ad08223b630fa3e0cf40fe6d",
      "fileid": "cc6a909a67dc31c28225263ed0b60aa612709f687413e435cf6475995c96f442",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/polkas/actions/runs/26003846938"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.6.1.9000",
      "date": "2026-05-17T22:02:29.000Z",
      "commit": "e1f0c6368acdd088ad08223b630fa3e0cf40fe6d",
      "fileid": "9977aa16bce1e371dff20d27a7120e8f0f72bd6253151fec9bfaadb540dd8eb6",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/polkas/actions/runs/26003846938"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.6.1.9000",
      "date": "2026-05-17T22:03:02.000Z",
      "commit": "e1f0c6368acdd088ad08223b630fa3e0cf40fe6d",
      "fileid": "5c8819bfc96e6878f3bf21f83d4d4c4d7c99d13a6226542bb46afd1b4639f82e",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/polkas/actions/runs/26003846938"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.6.1.9000",
      "date": "2026-05-17T22:03:05.000Z",
      "commit": "e1f0c6368acdd088ad08223b630fa3e0cf40fe6d",
      "fileid": "e9eefa280b26ad013e4d662cead45a72de247c668bef6f156483589970b9cb45",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/polkas/actions/runs/26003846938"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.6.1.9000",
      "date": "2026-05-17T22:02:28.000Z",
      "commit": "e1f0c6368acdd088ad08223b630fa3e0cf40fe6d",
      "fileid": "3057d61b9930d4dc13ac81ab3663258b5df0a23183dfc14a41fe517037b4a22f",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/polkas/actions/runs/26003846938"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.6.1.9000",
      "date": "2026-05-17T22:02:28.000Z",
      "commit": "e1f0c6368acdd088ad08223b630fa3e0cf40fe6d",
      "fileid": "2e828e32705264ad06fc44e2f2d4ea3db5b99270fe60a59a175559ad45c3eef9",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/polkas/actions/runs/26003846938"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.6.1.9000",
      "date": "2026-06-02T16:52:51.000Z",
      "commit": "e1f0c6368acdd088ad08223b630fa3e0cf40fe6d",
      "fileid": "29027a534157e30ab906bf54c38e73fe8fb7c1599e46ef27b84d0e921304884e",
      "status": "success",
      "buildurl": "https://github.com/r-universe/polkas/actions/runs/26003846938"
    }
  ]
}