{"id":"https://openalex.org/W4379793938","doi":"https://doi.org/10.1109/access.2023.3283340","title":"An Efficient Unsupervised Approach for OCR Error Correction of Vietnamese OCR Text","display_name":"An Efficient Unsupervised Approach for OCR Error Correction of Vietnamese OCR Text","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4379793938","doi":"https://doi.org/10.1109/access.2023.3283340"},"language":"en","primary_location":{"id":"doi:10.1109/access.2023.3283340","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3283340","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10144767.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10144767.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053018788","display_name":"Dung Nguyen","orcid":"https://orcid.org/0000-0003-1580-9032"},"institutions":[{"id":"https://openalex.org/I4210123993","display_name":"Van Lang University","ror":"https://ror.org/02ryrf141","country_code":"VN","type":"education","lineage":["https://openalex.org/I4210123993"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Quoc-Dung Nguyen","raw_affiliation_strings":["Faculty of Mechanical-Electrical and Computer Engineering, School of Technology, Van Lang University, Ho Chi Minh City, Vietnam"],"raw_orcid":"https://orcid.org/0000-0003-1580-9032","affiliations":[{"raw_affiliation_string":"Faculty of Mechanical-Electrical and Computer Engineering, School of Technology, Van Lang University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I4210123993"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033621300","display_name":"Nguyet-Minh Phan","orcid":null},"institutions":[{"id":"https://openalex.org/I47807185","display_name":"Saigon University","ror":"https://ror.org/01f1fsr30","country_code":"VN","type":"education","lineage":["https://openalex.org/I47807185"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Nguyet-Minh Phan","raw_affiliation_strings":["Faculty of Information Technology, Saigon University, Chi Minh City, Vietnam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, Saigon University, Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I47807185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088934266","display_name":"Pavel Kr\u00f6mer","orcid":"https://orcid.org/0000-0001-8428-3332"},"institutions":[{"id":"https://openalex.org/I142208455","display_name":"VSB - Technical University of Ostrava","ror":"https://ror.org/05x8mcb75","country_code":"CZ","type":"education","lineage":["https://openalex.org/I142208455"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Pavel Kr\u00f6mer","raw_affiliation_strings":["Department of Computer Science, VSB--Technical University of Ostrava, Ostrava, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0001-8428-3332","affiliations":[{"raw_affiliation_string":"Department of Computer Science, VSB--Technical University of Ostrava, Ostrava, Czech Republic","institution_ids":["https://openalex.org/I142208455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058292518","display_name":"Anh Duc Le","orcid":"https://orcid.org/0000-0002-9359-9686"},"institutions":[{"id":"https://openalex.org/I3020192730","display_name":"Tr\u01b0\u1eddng \u0110H Nguy\u1ec5n T\u1ea5t Th\u00e0nh","ror":"https://ror.org/04r9s1v23","country_code":"VN","type":"education","lineage":["https://openalex.org/I3020192730"]},{"id":"https://openalex.org/I4210134673","display_name":"The Institute of Statistical Mathematics","ror":"https://ror.org/03jcejr58","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I4210134673","https://openalex.org/I4210158934"]}],"countries":["JP","VN"],"is_corresponding":false,"raw_author_name":"Duc-Anh Le","raw_affiliation_strings":["The Institute of Statistical Mathematics, Tokyo, Japan","NTT Hi-Tech Institute, Nguyen Tat Thanh University, Ho Chi Minh City, Vietnam"],"raw_orcid":"https://orcid.org/0000-0002-9359-9686","affiliations":[{"raw_affiliation_string":"The Institute of Statistical Mathematics, Tokyo, Japan","institution_ids":["https://openalex.org/I4210134673"]},{"raw_affiliation_string":"NTT Hi-Tech Institute, Nguyen Tat Thanh University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I3020192730"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5053018788"],"corresponding_institution_ids":["https://openalex.org/I4210123993"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":2.4087,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.90788222,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"11","issue":null,"first_page":"58406","last_page":"58421"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8409155607223511},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.735612690448761},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.7259169816970825},{"id":"https://openalex.org/keywords/vietnamese","display_name":"Vietnamese","score":0.543254017829895},{"id":"https://openalex.org/keywords/error-detection-and-correction","display_name":"Error detection and correction","score":0.5404421091079712},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4605499804019928},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.453371524810791},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4116540253162384},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3848581910133362},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.30893248319625854},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10814017057418823},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0687064528465271}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8409155607223511},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.735612690448761},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.7259169816970825},{"id":"https://openalex.org/C103621254","wikidata":"https://www.wikidata.org/wiki/Q9199","display_name":"Vietnamese","level":2,"score":0.543254017829895},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.5404421091079712},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4605499804019928},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.453371524810791},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4116540253162384},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3848581910133362},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.30893248319625854},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10814017057418823},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0687064528465271},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/access.2023.3283340","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3283340","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10144767.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:dspace.vsb.cz:10084/152178","is_oa":true,"landing_page_url":"http://hdl.handle.net/10084/152178","pdf_url":"http://dspace.vsb.cz/bitstream/10084/152178/1/2169-3536-2023v11p58406.pdf","source":{"id":"https://openalex.org/S4306401668","display_name":"DSpace V\u0160B-TUO (V\u0160B-TUO)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I142208455","host_organization_name":"VSB - Technical University of Ostrava","host_organization_lineage":["https://openalex.org/I142208455"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},{"id":"pmh:oai:doaj.org/article:2a9f5237186b4ff39c107edce93b7d99","is_oa":true,"landing_page_url":"https://doaj.org/article/2a9f5237186b4ff39c107edce93b7d99","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 11, Pp 58406-58421 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2023.3283340","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3283340","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10144767.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G176604203","display_name":null,"funder_award_id":"02.1.01/0.0/0.0","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G291036901","display_name":null,"funder_award_id":"CZ.02","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G3490171961","display_name":null,"funder_award_id":"CZ.02.1.01","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G5916014575","display_name":null,"funder_award_id":"CZ.02.1.01/0.0/0.0","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G6594491356","display_name":null,"funder_award_id":"CZ.02.1.01/0.0/0.0/","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G6931559950","display_name":null,"funder_award_id":"CZ.02.1.01/0.0/0.0/17_049/0008425","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"}],"funders":[{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4379793938.pdf","grobid_xml":"https://content.openalex.org/works/W4379793938.grobid-xml"},"referenced_works_count":60,"referenced_works":["https://openalex.org/W104170146","https://openalex.org/W198078190","https://openalex.org/W1573345323","https://openalex.org/W1626689694","https://openalex.org/W1771572948","https://openalex.org/W1925516709","https://openalex.org/W1967238710","https://openalex.org/W1976951044","https://openalex.org/W1977877104","https://openalex.org/W1990871427","https://openalex.org/W2000020734","https://openalex.org/W2001496424","https://openalex.org/W2011084371","https://openalex.org/W2075546307","https://openalex.org/W2094020313","https://openalex.org/W2111809775","https://openalex.org/W2122585011","https://openalex.org/W2124130165","https://openalex.org/W2124807415","https://openalex.org/W2140608987","https://openalex.org/W2156773695","https://openalex.org/W2555924170","https://openalex.org/W2575782020","https://openalex.org/W2613930770","https://openalex.org/W2756610685","https://openalex.org/W2789315469","https://openalex.org/W2791089502","https://openalex.org/W2801596382","https://openalex.org/W2809468489","https://openalex.org/W2886313391","https://openalex.org/W2901878905","https://openalex.org/W2904452147","https://openalex.org/W2906436882","https://openalex.org/W2955124706","https://openalex.org/W2963212250","https://openalex.org/W2964322605","https://openalex.org/W2967691513","https://openalex.org/W2995333594","https://openalex.org/W3015310959","https://openalex.org/W3037091812","https://openalex.org/W3046591412","https://openalex.org/W3103765461","https://openalex.org/W3109952631","https://openalex.org/W3110596726","https://openalex.org/W3201355987","https://openalex.org/W3201477092","https://openalex.org/W4230543443","https://openalex.org/W4237342689","https://openalex.org/W4285308421","https://openalex.org/W4292068860","https://openalex.org/W4302771278","https://openalex.org/W4312290170","https://openalex.org/W6678531863","https://openalex.org/W6729879076","https://openalex.org/W6732000612","https://openalex.org/W6737734239","https://openalex.org/W6748704327","https://openalex.org/W6751111766","https://openalex.org/W6779943999","https://openalex.org/W6786177090"],"related_works":["https://openalex.org/W2901286616","https://openalex.org/W3113733647","https://openalex.org/W4206924063","https://openalex.org/W2901718966","https://openalex.org/W3042674643","https://openalex.org/W3022757400","https://openalex.org/W1844055093","https://openalex.org/W61780229","https://openalex.org/W2077055222","https://openalex.org/W2575782020"],"abstract_inverted_index":{"Different":[0],"types":[1],"of":[2,15],"OCR":[3,8,22,34,40,74,105],"errors":[4,41],"often":[5],"occur":[6],"in":[7,21,46,76,114,150],"texts":[9,75],"due":[10],"to":[11,84,93,146],"the":[12,110,115,147,151],"low":[13],"quality":[14],"scanned":[16],"document":[17],"images":[18],"or":[19],"limitations":[20],"software.":[23],"In":[24],"this":[25],"paper,":[26],"we":[27],"propose":[28],"a":[29],"novel":[30],"unsupervised":[31],"approach":[32],"for":[33,39],"error":[35],"correction.":[36],"Correction":[37,59],"candidates":[38,89],"are":[42,61],"generated":[43],"and":[44,86,133],"explored":[45],"their":[47],"neighborhoods":[48],"using":[49],"correction":[50,88],"character":[51],"edits":[52],"controlled":[53],"by":[54],"an":[55,104],"adapted":[56],"hill-climbing":[57],"algorithm.":[58],"characters":[60],"extracted":[62],"from":[63,109],"only":[64],"original":[65],"ground":[66],"truth":[67],"texts,":[68],"which":[69],"do":[70],"not":[71],"depend":[72],"on":[73,103],"training":[77],"data.":[78],"A":[79],"weighted":[80],"objective":[81],"function":[82],"used":[83],"score":[85],"rank":[87],"is":[90,101,127],"heuristically":[91],"tested":[92],"find":[94],"optimal":[95],"weight":[96],"combinations.":[97],"The":[98,124,135],"proposed":[99,125],"model":[100,126,141],"evaluated":[102],"text":[106,121],"dataset":[107],"originating":[108],"Vietnamese":[111,118],"handwritten":[112,120],"database":[113],"ICFHR":[116,152],"2018":[117,153],"online":[119],"recognition":[122],"competition.":[123,154],"also":[128],"verified":[129],"concerning":[130],"its":[131],"stability":[132],"complexity.":[134],"experimental":[136],"results":[137],"show":[138],"that":[139],"our":[140],"achieves":[142],"competitive":[143],"performance":[144],"compared":[145],"other":[148],"models":[149]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":6}],"updated_date":"2026-05-14T08:36:36.166977","created_date":"2025-10-10T00:00:00"}
