{"id":"https://openalex.org/W3150939841","doi":"https://doi.org/10.25972/opus-20923","title":"An Intelligent Semi-Automatic Workflow for Optical Character Recognition of Historical Printings","display_name":"An Intelligent Semi-Automatic Workflow for Optical Character Recognition of Historical Printings","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3150939841","doi":"https://doi.org/10.25972/opus-20923","mag":"3150939841"},"language":"en","primary_location":{"id":"pmh:oai:aleph.bib-bvb.de:BVB01-032444986","is_oa":false,"landing_page_url":"https://doi.org/10.25972/OPUS-20923","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://opus.bibliothek.uni-wuerzburg.de/frontdoor/index/index/docId/20923","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063938010","display_name":"Christian Reul","orcid":"https://orcid.org/0000-0002-1776-1469"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Reul, Christian","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5063938010"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0977,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.4450158,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9736999869346619,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9713000059127808,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.7799336910247803},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.7621283531188965},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.758459210395813},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5306897163391113},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5100001096725464},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46592068672180176},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.44898679852485657},{"id":"https://openalex.org/keywords/graphical-user-interface","display_name":"Graphical user interface","score":0.4299575388431549},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16122299432754517},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.12585419416427612},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.11820900440216064}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.7799336910247803},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.7621283531188965},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.758459210395813},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5306897163391113},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5100001096725464},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46592068672180176},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.44898679852485657},{"id":"https://openalex.org/C37789001","wikidata":"https://www.wikidata.org/wiki/Q782543","display_name":"Graphical user interface","level":2,"score":0.4299575388431549},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16122299432754517},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.12585419416427612},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.11820900440216064}],"mesh":[],"locations_count":4,"locations":[{"id":"pmh:oai:aleph.bib-bvb.de:BVB01-032444986","is_oa":false,"landing_page_url":"https://doi.org/10.25972/OPUS-20923","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:opus.bibliothek.uni-wuerzburg.de:20923","is_oa":true,"landing_page_url":"https://opus.bibliothek.uni-wuerzburg.de/frontdoor/index/index/docId/20923","pdf_url":null,"source":{"id":"https://openalex.org/S4306401635","display_name":"Online Publication Service of W\u00fcrzburg University (W\u00fcrzburg University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I25974101","host_organization_name":"University of W\u00fcrzburg","host_organization_lineage":["https://openalex.org/I25974101"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"doctoralthesis"},{"id":"doi:10.25972/opus-20923","is_oa":true,"landing_page_url":"https://doi.org/10.25972/opus-20923","pdf_url":null,"source":{"id":"https://openalex.org/S4306400930","display_name":"OPUS (Augsburg University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I119916105","host_organization_name":"Augsburg University","host_organization_lineage":["https://openalex.org/I119916105"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"thesis"},{"id":"mag:3150939841","is_oa":false,"landing_page_url":"https://opus.bibliothek.uni-wuerzburg.de/files/20923/Reul_Christian_Dissertation.pdf","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:opus.bibliothek.uni-wuerzburg.de:20923","is_oa":true,"landing_page_url":"https://opus.bibliothek.uni-wuerzburg.de/frontdoor/index/index/docId/20923","pdf_url":null,"source":{"id":"https://openalex.org/S4306401635","display_name":"Online Publication Service of W\u00fcrzburg University (W\u00fcrzburg University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I25974101","host_organization_name":"University of W\u00fcrzburg","host_organization_lineage":["https://openalex.org/I25974101"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"doctoralthesis"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5400000214576721}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2288869608","https://openalex.org/W2106015604","https://openalex.org/W2094107668","https://openalex.org/W2913957924","https://openalex.org/W1540895701","https://openalex.org/W3003773614","https://openalex.org/W2016909568","https://openalex.org/W2068095446","https://openalex.org/W2062417941","https://openalex.org/W1771572948","https://openalex.org/W142286180","https://openalex.org/W3016798175","https://openalex.org/W3004226020","https://openalex.org/W1999797110","https://openalex.org/W2743635764","https://openalex.org/W3160998721","https://openalex.org/W2185616559","https://openalex.org/W1600555583","https://openalex.org/W3119350346","https://openalex.org/W2791314317"],"abstract_inverted_index":{"Optical":[0],"Character":[1,404],"Recognition":[2,55],"(OCR)":[3],"on":[4,92,442,481,503,518,568,589],"historical":[5,39,341,498],"printings":[6],"is":[7,62,123,175,199],"a":[8,88,114,149,153,226,372,376,443,462,466,497],"challenging":[9],"task":[10,433],"mainly":[11],"due":[12,177],"to":[13,80,101,178,248,273,285,295,298,344,431,484,509,533,548],"the":[14,17,20,26,36,81,93,172,179,182,205,215,240,253,287,292,299,350,396,402,424,432,477,485,515,539,543,550,564,582],"complexity":[15],"of":[16,38,84,155,263,289,291,311,320,352,398,434,445,468,488,577],"layout":[18,49],"and":[19,51,57,144,294,358,367,379,392,426,471,474,555],"highly":[21],"variant":[22],"typography.":[23],"Nevertheless,":[24],"in":[25,35,42,75,87,204,214,223,239,245,349,461,507],"last":[27],"few":[28],"years":[29],"great":[30,556],"progress":[31],"has":[32],"been":[33],"made":[34],"area":[37],"OCR":[40,135,142],"resulting":[41,460],"several":[43,85,346],"powerful":[44],"open-source":[45,134,470],"tools":[46,86,96,323],"for":[47,116,187,304,314,324,340,356],"preprocessing,":[48],"analysis":[50],"segmentation,":[52],"Automatic":[53],"Text":[54],"(ATR)":[56],"postcorrection.":[58],"Their":[59],"major":[60],"drawback":[61],"that":[63,181,233,389,500,573],"they":[64],"only":[65,238,535],"offer":[66],"limited":[67],"applicability":[68],"by":[69,131,171,328,408,451,494,542],"non-technical":[70],"users":[71,525],"like":[72,332],"humanist":[73],"scholars,":[74],"particular":[76],"when":[77],"it":[78],"comes":[79],"combined":[82],"use":[83],"workflow.":[89,151],"Furthermore,":[90,563],"depending":[91],"material,":[94],"these":[95,129],"are":[97,283,360,385,429,440,598],"usually":[98],"not":[99,125,200,237,534],"able":[100,532],"fully":[102,161,275,565],"automatically":[103],"achieve":[104],"sufficiently":[105],"low":[106],"error":[107,235,250],"rates,":[108],"let":[109],"alone":[110],"perfect":[111],"results,":[112],"creating":[113],"demand":[115],"an":[117,133,315,364,380,458],"interactive":[118],"postcorrection":[119],"functionality":[120],"which,":[121],"however,":[122],"generally":[124],"incorporated.\r\nThis":[126],"thesis":[127],"addresses":[128],"issues":[130],"presenting":[132],"software":[136],"called":[137],"OCR4all":[138,218,312,574],"which":[139,269,439],"combines":[140],"state-of-the-art":[141,584],"components":[143,327],"continuous":[145],"model":[146],"training":[147,188,267,357],"into":[148],"comprehensive":[150],"While":[152],"variety":[154,444,467],"materials":[156],"can":[157,270,579],"already":[158,244],"be":[159,271],"processed":[160],"automatically,":[162],"books":[163,523],"with":[164,211,225,526,538,552],"more":[165,409],"complex":[166,512,520,544],"layouts":[167,591],"require":[168],"manual":[169,258],"intervention":[170],"users.":[173],"This":[174,448],"mostly":[176],"fact":[180],"required":[183],"Ground":[184],"Truth":[185],"(GT)":[186],"stronger":[189],"mixed":[190,437,595],"models":[191,438,597],"(for":[192],"segmentation":[193],"as":[194,196,457],"well":[195],"text":[197,483,551],"recognition)":[198],"available,":[201],"yet,":[202],"neither":[203],"desired":[206],"quantity":[207],"nor":[208],"quality.\r\nTo":[209],"deal":[210,537],"this":[212,256],"issue":[213],"short":[216],"run,":[217],"offers":[219],"better":[220],"recognition":[221,359,490],"capabilities":[222,282],"combination":[224],"very":[227,519],"comfortable":[228],"Graphical":[229],"User":[230],"Interface":[231],"(GUI)":[232],"allows":[234,313],"corrections":[236],"final":[241],"output,":[242],"but":[243,546],"early":[246,521],"stages":[247],"minimize":[249],"propagation.":[251],"In":[252],"long":[254],"run":[255],"constant":[257],"correction":[259],"produces":[260],"large":[261],"quantities":[262],"valuable,":[264],"high":[265],"quality":[266],"material":[268],"used":[272],"improve":[274],"automatic":[276],"approaches.":[277],"Further":[278],"on,":[279],"extensive":[280],"configuration":[281],"provided":[284],"set":[286],"degree":[288],"automation":[290,339],"workflow":[293],"make":[296],"adaptations":[297],"carefully":[300],"selected":[301],"default":[302],"parameters":[303],"specific":[305],"printings,":[306],"if":[307,592],"necessary.":[308],"The":[309],"architecture":[310],"easy":[316],"integration":[317],"(or":[318],"substitution)":[319],"newly":[321],"developed":[322],"its":[325,511],"main":[326,516],"supporting":[329],"standardized":[330],"interfaces":[331],"PageXML,":[333],"thus":[334],"aiming":[335],"at":[336],"continual":[337],"higher":[338],"printings.\r\nIn":[342],"addition":[343],"OCR4all,":[345],"methodical":[347],"extensions":[348,413],"form":[351],"accuracy":[353],"improving":[354],"techniques":[355,428],"presented.":[361],"Most":[362],"notably":[363],"effective,":[365],"sophisticated,":[366],"adaptable":[368],"voting":[369,393,425],"methodology":[370],"using":[371,452],"single":[373],"ATR":[374,480,596],"engine,":[375],"pretraining":[377,391,427],"procedure,":[378],"Active":[381],"Learning":[382],"(AL)":[383],"component":[384],"proposed.":[386],"Experiments":[387],"showed":[388,572],"combining":[390],"significantly":[394],"improves":[395],"effectiveness":[397],"book-specific":[399],"training,":[400],"reducing":[401],"obtained":[403],"Error":[405],"Rates":[406],"(CERs)":[407],"than":[410],"50%.\r\nThe":[411],"proposed":[412],"were":[414,531],"further":[415],"evaluated":[416],"during":[417],"two":[418],"real":[419],"world":[420],"case":[421],"studies:":[422],"First,":[423],"transferred":[430],"constructing":[435],"so-called":[436],"trained":[441],"different":[446,504],"fonts.":[447],"was":[449,491],"done":[450],"19th":[453,569],"century":[454,570],"Fraktur":[455],"script":[456],"example,":[459],"considerable":[463],"improvement":[464],"over":[465],"existing":[469],"commercial":[472,583],"engines":[473],"models.":[475],"Second,":[476],"extension":[478],"from":[479],"raw":[482],"adjacent":[486],"topic":[487],"typography":[489],"successfully":[492],"addressed":[493],"thoroughly":[495],"indexing":[496],"lexicon":[499],"heavily":[501],"relies":[502],"font":[505],"types":[506],"order":[508],"encode":[510],"semantic":[513],"structure.\r\nDuring":[514],"experiments":[517],"printed":[522],"even":[524],"minimal":[527],"or":[528],"no":[529],"experience":[530],"comfortably":[536],"challenges":[540],"presented":[541],"layout,":[545],"also":[547],"recognize":[549],"manageable":[553],"effort":[554],"quality,":[557],"achieving":[558],"excellent":[559],"CERs":[560],"below":[561],"0.5%.":[562],"automated":[566],"application":[567],"novels":[571],"(average":[575],"CER":[576],"0.85%)":[578],"considerably":[580],"outperform":[581],"tool":[585],"ABBYY":[586],"Finereader":[587],"(5.3%)":[588],"moderate":[590],"suitably":[593],"pretrained":[594],"available.":[599]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
