{"id":"https://openalex.org/W2980523701","doi":"https://doi.org/10.1145/3352631.3352638","title":"okralact - a multi-engine Open Source OCR training system","display_name":"okralact - a multi-engine Open Source OCR training system","publication_year":2019,"publication_date":"2019-09-20","ids":{"openalex":"https://openalex.org/W2980523701","doi":"https://doi.org/10.1145/3352631.3352638","mag":"2980523701"},"language":"en","primary_location":{"id":"doi:10.1145/3352631.3352638","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3352631.3352638","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th International Workshop on Historical Document Imaging and Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061484718","display_name":"Konstantin Baierer","orcid":"https://orcid.org/0000-0003-2397-242X"},"institutions":[{"id":"https://openalex.org/I2802918869","display_name":"Berlin State Library","ror":"https://ror.org/02ysgg478","country_code":"DE","type":"archive","lineage":["https://openalex.org/I2800703586","https://openalex.org/I2802918869"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Konstantin Baierer","raw_affiliation_strings":["Staatsbibliothek zu Berlin, Preu\u00dfischer Kulturbesitz"],"affiliations":[{"raw_affiliation_string":"Staatsbibliothek zu Berlin, Preu\u00dfischer Kulturbesitz","institution_ids":["https://openalex.org/I2802918869"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101426785","display_name":"Rui Dong","orcid":"https://orcid.org/0009-0006-7784-0050"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rui Dong","raw_affiliation_strings":["Khoury College of Computer Sciences, Northeastern University"],"affiliations":[{"raw_affiliation_string":"Khoury College of Computer Sciences, Northeastern University","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060385793","display_name":"Clemens Neudecker","orcid":"https://orcid.org/0000-0001-5293-8322"},"institutions":[{"id":"https://openalex.org/I2802918869","display_name":"Berlin State Library","ror":"https://ror.org/02ysgg478","country_code":"DE","type":"archive","lineage":["https://openalex.org/I2800703586","https://openalex.org/I2802918869"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Clemens Neudecker","raw_affiliation_strings":["Staatsbibliothek zu Berlin, Preu\u00dfischer Kulturbesitz"],"affiliations":[{"raw_affiliation_string":"Staatsbibliothek zu Berlin, Preu\u00dfischer Kulturbesitz","institution_ids":["https://openalex.org/I2802918869"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5061484718"],"corresponding_institution_ids":["https://openalex.org/I2802918869"],"apc_list":null,"apc_paid":null,"fwci":0.2024,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.5457958,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"25","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9717000126838684,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.821448564529419},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.7163205742835999},{"id":"https://openalex.org/keywords/standardization","display_name":"Standardization","score":0.5674991607666016},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5653430223464966},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.532605767250061},{"id":"https://openalex.org/keywords/interoperability","display_name":"Interoperability","score":0.5279720425605774},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.42667919397354126},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42472490668296814},{"id":"https://openalex.org/keywords/orthography","display_name":"Orthography","score":0.418582022190094},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38209259510040283},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3719246983528137},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.31750088930130005},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.13312307000160217},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.08556103706359863}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.821448564529419},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.7163205742835999},{"id":"https://openalex.org/C188087704","wikidata":"https://www.wikidata.org/wiki/Q369577","display_name":"Standardization","level":2,"score":0.5674991607666016},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5653430223464966},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.532605767250061},{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.5279720425605774},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.42667919397354126},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42472490668296814},{"id":"https://openalex.org/C150670947","wikidata":"https://www.wikidata.org/wiki/Q43091","display_name":"Orthography","level":3,"score":0.418582022190094},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38209259510040283},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3719246983528137},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.31750088930130005},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.13312307000160217},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.08556103706359863},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3352631.3352638","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3352631.3352638","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th International Workshop on Historical Document Imaging and Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G922498317","display_name":null,"funder_award_id":"274863866","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W114517082","https://openalex.org/W1980287119","https://openalex.org/W1989737761","https://openalex.org/W1993557076","https://openalex.org/W2060580591","https://openalex.org/W2099106019","https://openalex.org/W2127141656","https://openalex.org/W2163605009","https://openalex.org/W2268547524","https://openalex.org/W2480733761","https://openalex.org/W2741428179","https://openalex.org/W2768817781","https://openalex.org/W2785875661","https://openalex.org/W2787361829","https://openalex.org/W2962936306","https://openalex.org/W2981353851","https://openalex.org/W2981597140","https://openalex.org/W3013597222","https://openalex.org/W3106148001"],"related_works":["https://openalex.org/W2349956239","https://openalex.org/W2378767206","https://openalex.org/W2522162613","https://openalex.org/W1540871478","https://openalex.org/W328308450","https://openalex.org/W2393217036","https://openalex.org/W282641168","https://openalex.org/W2376963063","https://openalex.org/W4206957170","https://openalex.org/W2066396794"],"abstract_inverted_index":{"Optical":[0],"character":[1],"recognition":[2],"(OCR)":[3],"of":[4,14,24,29,47,89,105,122,128,147,158],"historical":[5,178],"documents":[6,90,179],"has":[7],"been":[8],"significantly":[9],"more":[10],"difficult":[11],"than":[12],"OCR":[13,37,65,75,136,170,176,186],"modern":[15,48],"texts":[16,31],"largely":[17],"due":[18],"to":[19,60,86,174,181],"idiosyncrasies":[20],"and":[21,112,124,156,160,180],"wide":[22],"variability":[23],"font,":[25],"layout,":[26],"language,":[27],"orthography":[28],"printed":[30],"before":[32],"ca.":[33],"1850.":[34],"However,":[35],"traditional":[36],"engines":[38,76,101,137,171],"were":[39],"optimized":[40],"towards":[41],"supporting":[42],"the":[43,58,62,154,182],"widest":[44],"possible":[45],"set":[46,121],"text":[49],"(\"OmniFont":[50],"OCR\")":[51],"with":[52],"little":[53],"or":[54,142],"no":[55],"facilities":[56],"for":[57,132,166,177],"user":[59],"adapt":[61],"engine.":[63],"Since":[64],"technologies":[66],"began":[67],"embracing":[68],"deep":[69],"neural":[70],"networks,":[71],"various":[72],"Free":[73],"Software":[74],"are":[77],"now":[78],"available":[79],"that":[80],"can":[81,172],"in":[82,103,110],"principle":[83],"be":[84],"adapted":[85],"different":[87],"types":[88],"by":[91],"training":[92,133,146],"specific":[93],"models":[94],"from":[95],"ground":[96],"truth":[97],"(GT).":[98],"What":[99],"these":[100,148],"offer":[102],"terms":[104],"implementation":[106,127],"finesse,":[107],"they":[108],"lack":[109],"interoperability":[111],"standardization.":[113],"To":[114],"overcome":[115],"this,":[116],"we":[117],"developed":[118],"okralact,":[119],"a":[120,125,163],"specifications":[123,155],"prototypical":[126],"an":[129],"engine-agnostic":[130],"system":[131,165],"Open":[134,168,184],"Source":[135,169,185],"like":[138],"Tesseract,":[139],"OCRopus,":[140],"kraken":[141],"Calamari.":[143],"We":[144],"discuss":[145],"engines,":[149],"compare":[150],"their":[151],"features,":[152],"describe":[153],"functionality":[157],"okralact":[159],"outline":[161],"how":[162],"turn-key":[164],"adapting":[167],"contribute":[173],"better":[175],"general":[183],"ecosystem.":[187]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
