{"id":"https://openalex.org/W4327727990","doi":"https://doi.org/10.1007/s10032-023-00428-9","title":"An end-to-end pipeline for historical censuses processing","display_name":"An end-to-end pipeline for historical censuses processing","publication_year":2023,"publication_date":"2023-03-17","ids":{"openalex":"https://openalex.org/W4327727990","doi":"https://doi.org/10.1007/s10032-023-00428-9"},"language":"en","primary_location":{"id":"doi:10.1007/s10032-023-00428-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10032-023-00428-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10032-023-00428-9.pdf","source":{"id":"https://openalex.org/S90108747","display_name":"International Journal on Document Analysis and Recognition (IJDAR)","issn_l":"1433-2825","issn":["1433-2825","1433-2833"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal on Document Analysis and Recognition (IJDAR)","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10032-023-00428-9.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041741621","display_name":"R\u00e9mi Petitpierre","orcid":"https://orcid.org/0000-0001-9138-6727"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"R\u00e9mi Petitpierre","raw_affiliation_strings":["Digital Humanities Institute, EPFL, Lausanne, Switzerland"],"raw_orcid":"https://orcid.org/0000-0001-9138-6727","affiliations":[{"raw_affiliation_string":"Digital Humanities Institute, EPFL, Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041555223","display_name":"Marion Kramer","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Marion Kramer","raw_affiliation_strings":["Digital Humanities Institute, EPFL, Lausanne, Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Digital Humanities Institute, EPFL, Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002901633","display_name":"Lucas Rappo","orcid":"https://orcid.org/0000-0002-7172-2495"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Lucas Rappo","raw_affiliation_strings":["Digital Humanities Institute, EPFL, Lausanne, Switzerland"],"raw_orcid":"https://orcid.org/0000-0002-7172-2495","affiliations":[{"raw_affiliation_string":"Digital Humanities Institute, EPFL, Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5041741621"],"corresponding_institution_ids":["https://openalex.org/I5124864"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.683,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.70303844,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"26","issue":"4","first_page":"419","last_page":"432"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9836000204086304,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7981200814247131},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.545146644115448},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5447022914886475},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.541662871837616},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5023477077484131},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4764002859592438},{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.4369153082370758},{"id":"https://openalex.org/keywords/row","display_name":"Row","score":0.41651245951652527},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.37036415934562683},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34590643644332886},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1309734582901001}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7981200814247131},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.545146644115448},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5447022914886475},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.541662871837616},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5023477077484131},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4764002859592438},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.4369153082370758},{"id":"https://openalex.org/C135598885","wikidata":"https://www.wikidata.org/wiki/Q1366302","display_name":"Row","level":2,"score":0.41651245951652527},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37036415934562683},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34590643644332886},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1309734582901001},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1007/s10032-023-00428-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10032-023-00428-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10032-023-00428-9.pdf","source":{"id":"https://openalex.org/S90108747","display_name":"International Journal on Document Analysis and Recognition (IJDAR)","issn_l":"1433-2825","issn":["1433-2825","1433-2833"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal on Document Analysis and Recognition (IJDAR)","raw_type":"journal-article"},{"id":"pmh:oai:infoscience.epfl.ch:301381","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/196345","pdf_url":"https://infoscience.epfl.ch/bitstreams/2025b54e-2dbc-48e3-91fa-95c77b8f8e61/download","source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"research article"},{"id":"pmh:oai:iris.unil.ch:iris/68946","is_oa":false,"landing_page_url":"https://iris.unil.ch/handle/iris/68946","pdf_url":null,"source":{"id":"https://openalex.org/S7407055444","display_name":"IRIS","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"journal article"},{"id":"pmh:oai:serval.unil.ch:BIB_121BD75D0260","is_oa":false,"landing_page_url":"https://serval.unil.ch/notice/serval:BIB_121BD75D0260","pdf_url":null,"source":{"id":"https://openalex.org/S4306401797","display_name":"SERVAL (Universit\u00e9 de Lausanne)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210093590","host_organization_name":"Swiss School of Archaeology in Greece","host_organization_lineage":["https://openalex.org/I4210093590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"International Journal on Document Analysis and Recognition (IJDAR), vol. 26, no. 4, pp. 419-432","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s10032-023-00428-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10032-023-00428-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10032-023-00428-9.pdf","source":{"id":"https://openalex.org/S90108747","display_name":"International Journal on Document Analysis and Recognition (IJDAR)","issn_l":"1433-2825","issn":["1433-2825","1433-2833"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal on Document Analysis and Recognition (IJDAR)","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4327727990.pdf"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W1902237438","https://openalex.org/W1903029394","https://openalex.org/W1993557076","https://openalex.org/W2001412060","https://openalex.org/W2073382397","https://openalex.org/W2095414072","https://openalex.org/W2104110038","https://openalex.org/W2133059825","https://openalex.org/W2170526995","https://openalex.org/W2233008261","https://openalex.org/W2508794233","https://openalex.org/W2560257216","https://openalex.org/W2785820430","https://openalex.org/W2786162033","https://openalex.org/W2786219411","https://openalex.org/W2786496645","https://openalex.org/W2786672397","https://openalex.org/W2786974559","https://openalex.org/W2787018495","https://openalex.org/W2787480186","https://openalex.org/W2787791489","https://openalex.org/W2795753477","https://openalex.org/W2798826627","https://openalex.org/W2905427852","https://openalex.org/W2943866203","https://openalex.org/W2963868896","https://openalex.org/W2964322605","https://openalex.org/W2980063486","https://openalex.org/W2988898388","https://openalex.org/W3003494340","https://openalex.org/W3004376248","https://openalex.org/W3011228495","https://openalex.org/W3055666129","https://openalex.org/W3088868295","https://openalex.org/W3096631793","https://openalex.org/W3102644800","https://openalex.org/W3105988348","https://openalex.org/W3119108181","https://openalex.org/W3157470726","https://openalex.org/W3159314278","https://openalex.org/W3202466114","https://openalex.org/W3216106108","https://openalex.org/W4246722913","https://openalex.org/W4393802888","https://openalex.org/W4393811435","https://openalex.org/W6600804061","https://openalex.org/W6676179485","https://openalex.org/W6702248584"],"related_works":["https://openalex.org/W3121299875","https://openalex.org/W1566754092","https://openalex.org/W3007479582","https://openalex.org/W3123744736","https://openalex.org/W953130165","https://openalex.org/W1996250799","https://openalex.org/W4214886263","https://openalex.org/W4388446985","https://openalex.org/W4241194350","https://openalex.org/W2216243182"],"abstract_inverted_index":{"Abstract":[0],"Censuses":[1],"are":[2],"structured":[3],"documents":[4],"of":[5,24,30,60,78,93,113,149,154,160,166,181],"great":[6],"value":[7],"for":[8,74],"social":[9],"and":[10,26,37,128,152,163,172,218],"demographic":[11],"history,":[12],"which":[13,108],"became":[14],"widespread":[15],"from":[16,90],"the":[17,22,27,58,79,91,94,111,114,117,134,150,155,158,164,178],"nineteenth":[18,189],"century":[19,190],"on.":[20],"However,":[21],"plurality":[23],"formats":[25],"natural":[28],"variability":[29],"historical":[31,208],"data":[32],"make":[33,140],"their":[34],"extraction":[35],"arduous":[36],"often":[38],"lead":[39],"to":[40,56,121,131,143,169],"ungeneric":[41],"recognition":[42,77],"algorithms.":[43],"We":[44],"propose":[45,120],"an":[46,54,104],"end-to-end":[47],"processing":[48],"pipeline,":[49],"based":[50,67],"on":[51,68,186],"optimization,":[52],"in":[53,200],"attempt":[55],"reduce":[57],"number":[59],"free":[61],"parameters.":[62],"The":[63,83,97],"layout":[64],"analysis":[65],"is":[66,87,101,215],"semantic":[69],"segmentation":[70],"using":[71],"neural":[72,126],"networks":[73],"a":[75,204],"generic":[76],"explicit":[80],"column":[81],"structure.":[82],"implicit":[84],"row":[85],"structure":[86],"deduced":[88],"directly":[89],"position":[92],"text":[95,99],"segments.":[96],"handwritten":[98],"detection":[100],"complemented":[102],"by":[103],"intelligent":[105],"framing":[106],"method":[107],"significantly":[109],"improves":[110],"quality":[112],"HTR.":[115],"In":[116,192],"end,":[118],"we":[119],"combine":[122],"several":[123],"post-correction":[124,173],"approaches,":[125],"networks,":[127],"language":[129],"models,":[130],"further":[132],"improve":[133],"performance.":[135],"Ultimately,":[136],"our":[137],"flexible":[138],"methods":[139],"it":[141],"possible":[142],"accurately":[144],"detect":[145],"more":[146,194],"than":[147,195],"98%":[148],"columns":[151],"88%":[153],"rows,":[156],"despite":[157],"lack":[159],"graphical":[161],"separator":[162],"diversity":[165],"formats.":[167],"Thanks":[168],"various":[170],"reframing":[171],"strategies,":[174],"HTR":[175],"results":[176],"reach":[177],"excellent":[179],"performance":[180],"3.44%":[182],"character":[183],"error":[184],"rate":[185],"these":[187],"noisy":[188],"data.":[191],"total,":[193],"18,831":[196],"pages":[197],"were":[198],"extracted":[199],"72":[201],"censuses":[202],"over":[203],"century.":[205],"This":[206],"large":[207],"dataset,":[209],"as":[210,212],"well":[211],"training":[213],"data,":[214],"made":[216],"open-access":[217],"released":[219],"along":[220],"with":[221],"this":[222],"article.":[223]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
