{"id":"https://openalex.org/W2737639484","doi":"https://doi.org/10.1145/3078081.3078107","title":"Poor Man's OCR Post-Correction","display_name":"Poor Man's OCR Post-Correction","publication_year":2017,"publication_date":"2017-06-01","ids":{"openalex":"https://openalex.org/W2737639484","doi":"https://doi.org/10.1145/3078081.3078107","mag":"2737639484"},"language":"en","primary_location":{"id":"doi:10.1145/3078081.3078107","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3078081.3078107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd International Conference on Digital Access to Textual Cultural Heritage","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007975524","display_name":"Harald Hammarstr\u00f6m","orcid":"https://orcid.org/0000-0003-0120-6396"},"institutions":[{"id":"https://openalex.org/I123387679","display_name":"Uppsala University","ror":"https://ror.org/048a87296","country_code":"SE","type":"education","lineage":["https://openalex.org/I123387679"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Harald Hammarstr\u00f6m","raw_affiliation_strings":["Department of Linguistics and Philology, Uppsala University, Uppsala, Sweden"],"affiliations":[{"raw_affiliation_string":"Department of Linguistics and Philology, Uppsala University, Uppsala, Sweden","institution_ids":["https://openalex.org/I123387679"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085555031","display_name":"Shafqat Mumtaz Virk","orcid":"https://orcid.org/0000-0002-5030-9191"},"institutions":[{"id":"https://openalex.org/I881427289","display_name":"University of Gothenburg","ror":"https://ror.org/01tm6cn81","country_code":"SE","type":"education","lineage":["https://openalex.org/I881427289"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Shafqat Mumtaz Virk","raw_affiliation_strings":["Spr\u00e5kbanken, University of Gothenburg, Gothenburg, Sweden"],"affiliations":[{"raw_affiliation_string":"Spr\u00e5kbanken, University of Gothenburg, Gothenburg, Sweden","institution_ids":["https://openalex.org/I881427289"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063605821","display_name":"Markus Forsberg","orcid":"https://orcid.org/0000-0002-0335-8142"},"institutions":[{"id":"https://openalex.org/I881427289","display_name":"University of Gothenburg","ror":"https://ror.org/01tm6cn81","country_code":"SE","type":"education","lineage":["https://openalex.org/I881427289"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Markus Forsberg","raw_affiliation_strings":["Spr\u00e5kbanken, University of Gothenburg, Gothenburg, Sweden"],"affiliations":[{"raw_affiliation_string":"Spr\u00e5kbanken, University of Gothenburg, Gothenburg, Sweden","institution_ids":["https://openalex.org/I881427289"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5007975524"],"corresponding_institution_ids":["https://openalex.org/I123387679"],"apc_list":null,"apc_paid":null,"fwci":0.2051,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.56533637,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"71","last_page":"75"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.980400025844574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.8400614261627197},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8372650146484375},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7023428678512573},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.693550169467926},{"id":"https://openalex.org/keywords/orthography","display_name":"Orthography","score":0.6920057535171509},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.645641565322876},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.641007661819458},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5946769714355469},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.53049635887146},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.47983595728874207},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36964547634124756},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.11961635947227478},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.07731670141220093}],"concepts":[{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.8400614261627197},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8372650146484375},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7023428678512573},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.693550169467926},{"id":"https://openalex.org/C150670947","wikidata":"https://www.wikidata.org/wiki/Q43091","display_name":"Orthography","level":3,"score":0.6920057535171509},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.645641565322876},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.641007661819458},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5946769714355469},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.53049635887146},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.47983595728874207},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36964547634124756},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.11961635947227478},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.07731670141220093},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3078081.3078107","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3078081.3078107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd International Conference on Digital Access to Textual Cultural Heritage","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.49000000953674316}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2349956239","https://openalex.org/W2522162613","https://openalex.org/W2393217036","https://openalex.org/W4206957170","https://openalex.org/W2184509518","https://openalex.org/W2080283158","https://openalex.org/W3183814539","https://openalex.org/W4243842732","https://openalex.org/W4388778088","https://openalex.org/W2944691285"],"abstract_inverted_index":{"The":[0,119],"accuracy":[1,51],"of":[2,14,24,31],"Optical":[3],"Character":[4],"Recognition":[5],"(OCR)":[6],"is":[7,121],"sets":[8],"the":[9,12,29,56,111,114,117],"limit":[10],"for":[11,63],"success":[13],"subsequent":[15],"applications":[16],"used":[17],"in":[18,82],"text":[19,33],"analyzing":[20],"pipeline.":[21],"Recent":[22],"models":[23],"OCR":[25,64],"postprocessing":[26],"significantly":[27],"improve":[28],"quality":[30],"OCR-generated":[32],"but":[34],"require":[35],"engineering":[36],"work":[37],"or":[38,44,72,106],"resources":[39,71],"such":[40,50,98],"as":[41,110],"human-labeled":[42],"data":[43],"a":[45,61,126,131],"dictionary":[46],"to":[47,103],"perform":[48],"with":[49,69],"on":[52],"novel":[53],"datasets.":[54],"In":[55,76],"present":[57],"paper":[58],"we":[59],"introduce":[60],"technique":[62],"post-processing":[65],"that":[66,84],"runs":[67],"off-the-shelf":[68],"no":[70],"parameter":[73],"tuning":[74],"required.":[75],"essence,":[77],"words":[78],"which":[79],"are":[80,85,94],"similar":[81,89],"form":[83],"also":[86],"distributionally":[87],"more":[88],"than":[90],"expected":[91],"at":[92,116],"random":[93],"deemed":[95],"OCR-variants.":[96],"As":[97],"it":[99],"can":[100],"be":[101],"applied":[102],"any":[104],"language":[105,115],"genre":[107],"(as":[108],"long":[109],"orthography":[112],"segments":[113],"word-level).":[118],"algorithm":[120],"illustrated":[122],"and":[123,130],"evaluated":[124],"using":[125],"multilingual":[127],"document":[128],"collection":[129],"benchmark":[132],"English":[133],"dataset.":[134]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
