{"id":"https://openalex.org/W4399554654","doi":"https://doi.org/10.48550/arxiv.2406.05264","title":"\"Minus-One\" Data Prediction Generates Synthetic Census Data with Good Crosstabulation Fidelity","display_name":"\"Minus-One\" Data Prediction Generates Synthetic Census Data with Good Crosstabulation Fidelity","publication_year":2024,"publication_date":"2024-06-07","ids":{"openalex":"https://openalex.org/W4399554654","doi":"https://doi.org/10.48550/arxiv.2406.05264"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2406.05264","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.05264","pdf_url":"https://arxiv.org/pdf/2406.05264","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.05264","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021973169","display_name":"William H. Press","orcid":"https://orcid.org/0000-0003-0771-0841"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Press, William H.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5021973169"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11980","display_name":"Human Mobility and Location-Based Analysis","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/3313","display_name":"Transportation"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11980","display_name":"Human Mobility and Location-Based Analysis","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/3313","display_name":"Transportation"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11344","display_name":"Traffic Prediction and Management Techniques","score":0.9089000225067139,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/census","display_name":"Census","score":0.7784335613250732},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.7287039756774902},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5356046557426453},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.4711792767047882},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4519176781177521},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.38682612776756287},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.3298454284667969},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.32150140404701233},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29255467653274536},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24032965302467346},{"id":"https://openalex.org/keywords/demography","display_name":"Demography","score":0.10905042290687561},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07001069188117981}],"concepts":[{"id":"https://openalex.org/C52130261","wikidata":"https://www.wikidata.org/wiki/Q39825","display_name":"Census","level":3,"score":0.7784335613250732},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.7287039756774902},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5356046557426453},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.4711792767047882},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4519176781177521},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.38682612776756287},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.3298454284667969},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.32150140404701233},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29255467653274536},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24032965302467346},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.10905042290687561},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07001069188117981},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2406.05264","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.05264","pdf_url":"https://arxiv.org/pdf/2406.05264","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2406.05264","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2406.05264","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2406.05264","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.05264","pdf_url":"https://arxiv.org/pdf/2406.05264","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399554654.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2128472366","https://openalex.org/W621243299","https://openalex.org/W5594354","https://openalex.org/W4244351752","https://openalex.org/W2601163983","https://openalex.org/W2364090708","https://openalex.org/W2145323372","https://openalex.org/W1512152715","https://openalex.org/W4384517922","https://openalex.org/W4377088509"],"abstract_inverted_index":{"We":[0,105],"propose":[1],"to":[2,39,56,70,85,109,113],"capture":[3],"relevant":[4],"statistical":[5],"associations":[6],"in":[7],"a":[8,15,22,66,87],"dataset":[9],"of":[10,60,90,103,117],"categorical":[11],"survey":[12],"responses":[13,78],"by":[14],"method,":[16],"here":[17],"termed":[18],"MODP,":[19],"that":[20],"\"learns\"":[21],"probabilistic":[23],"prediction":[24],"function":[25],"L.":[26],"Specifically,":[27],"L":[28,68],"predicts":[29],"each":[30],"question's":[31],"response":[32],"based":[33],"on":[34],"the":[35,41,46,57,111,115,118],"same":[36],"respondent's":[37],"answers":[38],"all":[40,93],"other":[42],"questions.":[43],"Draws":[44],"from":[45],"resulting":[47],"probability":[48],"distribution":[49],"become":[50],"synthetic":[51,77],"responses.":[52],"Applying":[53],"this":[54],"methodology":[55],"PUMS":[58],"subset":[59],"Census":[61],"ACS":[62],"data,":[63],"and":[64,107],"with":[65,96],"learned":[67],"akin":[69],"multiple":[71],"parallel":[72],"logistic":[73],"regression,":[74],"we":[75],"generate":[76],"whose":[79],"crosstabulations":[80],"(two-point":[81],"conditionals)":[82],"are":[83],"found":[84],"have":[86],"median":[88],"accuracy":[89],"~5%":[91],"across":[92],"crosstabulation":[94],"cells,":[95],"cell":[97],"counts":[98],"ranging":[99],"over":[100],"four":[101],"orders":[102],"magnitude.":[104],"investigate":[106],"attempt":[108],"quantify":[110],"degree":[112],"which":[114],"privacy":[116],"original":[119],"data":[120],"is":[121],"protected.":[122]},"counts_by_year":[],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2024-06-12T00:00:00"}
