{"id":"https://openalex.org/W7151553970","doi":"https://doi.org/10.1109/icmla66185.2025.00025","title":"Optical Character Recognition for Pre-Digital Historical Documents using Large Language Models","display_name":"Optical Character Recognition for Pre-Digital Historical Documents using Large Language Models","publication_year":2025,"publication_date":"2025-12-03","ids":{"openalex":"https://openalex.org/W7151553970","doi":"https://doi.org/10.1109/icmla66185.2025.00025"},"language":null,"primary_location":{"id":"doi:10.1109/icmla66185.2025.00025","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icmla66185.2025.00025","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Machine Learning and Applications (ICMLA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049327871","display_name":"Chreston Miller","orcid":"https://orcid.org/0000-0003-4276-0537"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chreston Miller","raw_affiliation_strings":["University Libraries Virginia Tech,Blacksburg,USA"],"affiliations":[{"raw_affiliation_string":"University Libraries Virginia Tech,Blacksburg,USA","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085592027","display_name":"Bipasha Banerjee","orcid":"https://orcid.org/0000-0003-4472-1902"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bipasha Banerjee","raw_affiliation_strings":["University Libraries Virginia Tech,Blacksburg,USA"],"affiliations":[{"raw_affiliation_string":"University Libraries Virginia Tech,Blacksburg,USA","institution_ids":["https://openalex.org/I859038795"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5049327871"],"corresponding_institution_ids":["https://openalex.org/I859038795"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.74931241,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"144","last_page":"151"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9059000015258789,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9059000015258789,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.00930000003427267,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.004800000227987766,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4302999973297119},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.41670000553131104},{"id":"https://openalex.org/keywords/character-recognition","display_name":"Character recognition","score":0.3864000141620636},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3495999872684479},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.33869999647140503},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.33489999175071716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6934999823570251},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5764999985694885},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5691999793052673},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4302999973297119},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.41670000553131104},{"id":"https://openalex.org/C2987247673","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Character recognition","level":3,"score":0.3864000141620636},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3806000053882599},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3495999872684479},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.33489999175071716},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.31850001215934753},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.28790000081062317},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icmla66185.2025.00025","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icmla66185.2025.00025","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Machine Learning and Applications (ICMLA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":2,"referenced_works":["https://openalex.org/W3208962647","https://openalex.org/W4387321091"],"related_works":[],"abstract_inverted_index":{"Multi-modal":[0],"large":[1,234],"language":[2],"models":[3],"(LLMs)":[4],"with":[5,117,150,182,264,275],"vision":[6,265],"capabilities":[7,114],"have":[8],"been":[9],"shown":[10],"to":[11,18,37,49,73,96,146,148,195,266],"be":[12],"a":[13,97,124,138,157],"promising":[14],"and":[15,64,91,107,142,189,216],"effective":[16],"approach":[17],"extracting":[19],"text":[20,29,112],"from":[21,166,241],"images":[22],"or":[23,62,67],"optical":[24],"character":[25,207],"recognition":[26],"(OCR).":[27],"However,":[28],"extraction":[30,113],"using":[31,205],"OCR":[32,75,78,130,144,175,197,224,268],"is":[33],"challenging":[34,276],"when":[35],"applied":[36],"scanned":[38,80,132,162],"pre-digital":[39,81,133,163,272],"historical":[40,82,134,164,273],"documents.":[41,135],"Factors":[42],"such":[43],"as":[44,248],"poor":[45,68],"scan":[46,59],"quality":[47],"due":[48],"the":[50,53,101,111,167,173,206,217,231,261],"age":[51],"of":[52,57,100,115,140,239,271],"documents,":[54],"various":[55],"levels":[56],"fading,":[58],"skew":[60],"(left":[61],"right),":[63],"possible":[65],"abnormal":[66],"background-to-text":[69],"contrast":[70],"can":[71,94],"contribute":[72],"incorrect":[74],"results.":[76],"Performing":[77],"on":[79,131,198,269],"documents":[83,165,274],"makes":[84],"them":[85],"machine-readable,":[86],"thus":[87],"enabling":[88],"computational":[89],"analysis":[90],"preservation.":[92],"This":[93],"lead":[95],"better":[98,228],"understanding":[99],"past,":[102],"especially":[103],"for":[104,127,171,222,233],"significant":[105],"events":[106],"time":[108],"periods.":[109],"Given":[110],"LLMs":[116,149,181,263],"vision,":[118,183],"we":[119,155],"posit":[120],"that":[121,180],"they":[122],"are":[123,193],"viable":[125],"option":[126],"performing":[128],"robust":[129],"We":[136,203],"chose":[137],"set":[139],"foundational":[141],"capable":[143],"technologies":[145],"compare":[147],"vision.":[151],"To":[152],"accomplish":[153],"this,":[154],"curated":[156],"ground":[158],"truth":[159],"dataset":[160,200],"comprising":[161],"early":[168],"twentieth":[169],"century":[170],"comparing":[172],"chosen":[174],"technologies.":[176],"Our":[177],"experiments":[178],"showed":[179],"specifically":[184],"Mistral":[185,226],"AI\u2019s":[186,191],"Mistral-Small-3.1-24B-Instruct-2503":[187],"model":[188],"Allen":[190],"olmOCR-7B-0225-preview,":[192],"able":[194],"perform":[196,267],"our":[199,242],"very":[201],"well.":[202],"evaluated":[204],"error":[208,235],"rate":[209],"(CER),":[210],"BLEU":[211],"score,":[212],"multiple":[213],"ROUGE":[214],"scores,":[215],"Normalized":[218],"Levenshtein":[219],"Distance":[220],"(NLD)":[221],"each":[223],"technology.":[225],"had":[227],"results":[229,259],"but":[230,250],"potential":[232],"(2":[236],"cases":[237],"out":[238],"359":[240],"dataset),":[243],"while":[244],"olmOCR":[245],"performed":[246],"almost":[247],"well":[249],"was":[251],"more":[252],"consistent":[253],"in":[254],"mitigating":[255],"high":[256],"error.":[257],"These":[258],"support":[260],"use":[262],"scans":[270],"characteristics.":[277]},"counts_by_year":[],"updated_date":"2026-04-09T06:08:40.794217","created_date":"2026-04-08T00:00:00"}
