{"id":"https://openalex.org/W4406458302","doi":"https://doi.org/10.1109/bigdata62323.2024.10825435","title":"Sifting US Census Records with Computer Vision and Machine Learning","display_name":"Sifting US Census Records with Computer Vision and Machine Learning","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406458302","doi":"https://doi.org/10.1109/bigdata62323.2024.10825435"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825435","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825435","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005700121","display_name":"Gregory Jansen","orcid":"https://orcid.org/0000-0001-6591-6595"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Gregory N. Jansen","raw_affiliation_strings":["University of Maryland at College Park,School of Information,College Park,United States"],"affiliations":[{"raw_affiliation_string":"University of Maryland at College Park,School of Information,College Park,United States","institution_ids":["https://openalex.org/I66946132"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5005700121"],"corresponding_institution_ids":["https://openalex.org/I66946132"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26638599,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2431","last_page":"2439"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9510999917984009,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9510999917984009,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9376999735832214,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9337000250816345,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/census","display_name":"Census","score":0.8251557350158691},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7020998001098633},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4954572916030884},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.40383318066596985},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36304837465286255},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3370603322982788},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.32698941230773926},{"id":"https://openalex.org/keywords/demography","display_name":"Demography","score":0.07355654239654541},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.053517550230026245}],"concepts":[{"id":"https://openalex.org/C52130261","wikidata":"https://www.wikidata.org/wiki/Q39825","display_name":"Census","level":3,"score":0.8251557350158691},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7020998001098633},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4954572916030884},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.40383318066596985},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36304837465286255},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3370603322982788},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.32698941230773926},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.07355654239654541},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.053517550230026245},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825435","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825435","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2000782346","https://openalex.org/W2003213232","https://openalex.org/W2106990937","https://openalex.org/W2594899909","https://openalex.org/W2734358244","https://openalex.org/W2970354824","https://openalex.org/W3035558131","https://openalex.org/W4206608019","https://openalex.org/W6758161276"],"related_works":["https://openalex.org/W2128472366","https://openalex.org/W621243299","https://openalex.org/W5594354","https://openalex.org/W4244351752","https://openalex.org/W2601163983","https://openalex.org/W2364090708","https://openalex.org/W2145323372","https://openalex.org/W1512152715","https://openalex.org/W2009948611","https://openalex.org/W2182026161"],"abstract_inverted_index":{"This":[0,75,158],"paper":[1,271],"shares":[2],"the":[3,39,55,64,81,89,92,96,104,115,130,140,156,161,181,190,207,232,264,270],"culmination":[4],"of":[5,84,91,95,99,228],"my":[6],"work":[7],"to":[8,13,32,79,153,179,223,249,259],"computationally":[9],"enhance":[10],"researcher":[11,82,222],"access":[12],"U.":[14],"S.":[15],"Census":[16],"records,":[17],"by":[18],"targeting":[19],"their":[20,186,250],"personal":[21],"transcription":[22],"labor":[23],"on":[24,38,58,67],"those":[25,243],"document":[26],"pages":[27,246],"that":[28,48,149,164,219,247,256],"are":[29,63,165,194,267,273],"most":[30],"likely":[31],"contain":[33],"relevant":[34],"information.":[35,74],"Much":[36],"research":[37],"United":[40],"States":[41],"population":[42,60,177,244],"over":[43],"time":[44],"concerns":[45],"demographic":[46],"groups":[47],"may":[49,150],"be":[50,151],"identified,":[51],"for":[52,175,205,276],"example,":[53],"through":[54,110],"race":[56,187,209],"column":[57],"census":[59,69,131],"schedules,":[61],"which":[62,68],"handwritten":[65,208],"forms":[66,132],"takers":[70],"would":[71],"record":[72],"household":[73],"project":[76],"was":[77],"created":[78,215],"support":[80],"efforts":[83],"Dr.":[85],"Richard":[86],"Marciano":[87],"and":[88,119,124,147,160,197,235,263,272],"study":[90],"community":[93],"impact":[94],"forced":[97],"relocation":[98],"Japanese":[100,116],"American":[101,117],"households":[102,118],"during":[103],"second":[105],"world":[106],"war.":[107],"In":[108],"particular":[109],"a":[111,134,171,216,221,225,238,279],"detailed":[112],"comparison":[113],"between":[114],"people":[120],"recorded":[121],"in":[122,125,137],"1940":[123],"1950":[126],"Sacramento":[127],"California.":[128],"While":[129],"have":[133],"different":[135,201],"layout":[136],"each":[138],"decade,":[139],"general":[141],"design":[142],"is":[143],"tabular":[144],"with":[145,168],"rows":[146],"columns":[148],"used":[152,258],"visually":[154],"segment":[155],"document.":[157],"paper,":[159],"code":[162,210,254],"notebooks":[163,255],"published":[166],"along":[167],"it,":[169],"demonstrate":[170],"computer":[172],"vision":[173],"technique":[174],"segmenting":[176],"schedules":[178],"extract":[180],"individual":[182,191],"cell":[183,192],"images":[184,193],"from":[185,231],"column.":[188],"Then":[189],"cleaned":[195],"up":[196],"fed":[198],"into":[199],"two":[200],"neural":[202],"network":[203],"models,":[204],"identifying":[206],"within":[211,269],"them.":[212],"Finally,":[213],"we":[214],"user":[217],"interface":[218],"allows":[220],"perform":[224,260],"visual":[226],"review":[227,265],"uncertain":[229],"results":[230],"above":[233],"process":[234,266],"thereby":[236],"create":[237],"reliable":[239],"dataset":[240],"containing":[241],"only":[242],"schedule":[245],"pertain":[248],"research.":[251],"The":[252],"Python":[253],"were":[257],"this":[261],"analysis":[262],"linked":[268],"freely":[274],"available":[275],"reuse":[277],"under":[278],"Creative":[280],"Commons":[281],"share-alike":[282],"license.":[283]},"counts_by_year":[],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
