{"id":"https://openalex.org/W4409763391","doi":"https://doi.org/10.1109/csicc65765.2025.10967472","title":"Comparative Analysis of Large Language Models for OCR Post-Processing in Persian: From ParsBERT to GPT","display_name":"Comparative Analysis of Large Language Models for OCR Post-Processing in Persian: From ParsBERT to GPT","publication_year":2025,"publication_date":"2025-02-05","ids":{"openalex":"https://openalex.org/W4409763391","doi":"https://doi.org/10.1109/csicc65765.2025.10967472"},"language":"en","primary_location":{"id":"doi:10.1109/csicc65765.2025.10967472","is_oa":false,"landing_page_url":"https://doi.org/10.1109/csicc65765.2025.10967472","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 29th International Computer Conference, Computer Society of Iran (CSICC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073746598","display_name":"Fatemeh Valizadeh","orcid":"https://orcid.org/0000-0001-8011-2560"},"institutions":[{"id":"https://openalex.org/I115566878","display_name":"Shahid Bahonar University of Kerman","ror":"https://ror.org/04zn42r77","country_code":"IR","type":"education","lineage":["https://openalex.org/I115566878"]}],"countries":["IR"],"is_corresponding":true,"raw_author_name":"Fatemeh Valizadeh","raw_affiliation_strings":["Shahid Bahonar University of Kerman,Department of Computer Engineering,Kerman,Iran"],"affiliations":[{"raw_affiliation_string":"Shahid Bahonar University of Kerman,Department of Computer Engineering,Kerman,Iran","institution_ids":["https://openalex.org/I115566878"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036529918","display_name":"Fahimeh Ghasemian","orcid":"https://orcid.org/0000-0002-2176-7089"},"institutions":[{"id":"https://openalex.org/I115566878","display_name":"Shahid Bahonar University of Kerman","ror":"https://ror.org/04zn42r77","country_code":"IR","type":"education","lineage":["https://openalex.org/I115566878"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Fahimeh Ghasemian","raw_affiliation_strings":["Shahid Bahonar University of Kerman,Department of Computer Engineering,Kerman,Iran"],"affiliations":[{"raw_affiliation_string":"Shahid Bahonar University of Kerman,Department of Computer Engineering,Kerman,Iran","institution_ids":["https://openalex.org/I115566878"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072534068","display_name":"Elham Shabaninia","orcid":"https://orcid.org/0000-0002-1357-8288"},"institutions":[{"id":"https://openalex.org/I183263030","display_name":"Graduate University of Advanced Technology","ror":"https://ror.org/0451xdy64","country_code":"IR","type":"education","lineage":["https://openalex.org/I183263030"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Elham Shabaninia","raw_affiliation_strings":["Graduate University of Advanced Technology,Department of Applied Mathematics,Kerman,Iran"],"affiliations":[{"raw_affiliation_string":"Graduate University of Advanced Technology,Department of Applied Mathematics,Kerman,Iran","institution_ids":["https://openalex.org/I183263030"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5073746598"],"corresponding_institution_ids":["https://openalex.org/I115566878"],"apc_list":null,"apc_paid":null,"fwci":2.8599,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.90491988,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9510999917984009,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9510999917984009,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9491000175476074,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/persian","display_name":"Persian","score":0.8251684904098511},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6935191750526428},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6497175097465515},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48561611771583557},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3833080530166626},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2884238362312317}],"concepts":[{"id":"https://openalex.org/C2776527531","wikidata":"https://www.wikidata.org/wiki/Q9168","display_name":"Persian","level":2,"score":0.8251684904098511},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6935191750526428},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6497175097465515},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48561611771583557},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3833080530166626},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2884238362312317},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/csicc65765.2025.10967472","is_oa":false,"landing_page_url":"https://doi.org/10.1109/csicc65765.2025.10967472","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 29th International Computer Conference, Computer Society of Iran (CSICC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7599999904632568,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2014956166","https://openalex.org/W2071003614","https://openalex.org/W2111930270","https://openalex.org/W2150429754","https://openalex.org/W2805313330","https://openalex.org/W3032299950","https://openalex.org/W3039590646","https://openalex.org/W3090694317","https://openalex.org/W3178709966","https://openalex.org/W4385245566","https://openalex.org/W4388700798","https://openalex.org/W6607985903","https://openalex.org/W6636915900","https://openalex.org/W6729879076"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4232962587","https://openalex.org/W2339787954","https://openalex.org/W2258261728","https://openalex.org/W2330233494","https://openalex.org/W1501405543","https://openalex.org/W2890674960","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Optical":[0],"Character":[1],"Recognition":[2],"(OCR)":[3],"refers":[4],"to":[5,22],"the":[6,35,42,46,63,66,79,131,148,152],"automatic":[7],"identification":[8],"of":[9,37,45,62,74,81,112,135],"text":[10],"in":[11,34,50,72],"images":[12],"and":[13,18,31,60,88,115,125,133,166,174],"its":[14,23],"conversion":[15],"into":[16,144],"searchable":[17],"editable":[19],"formats.":[20],"Due":[21],"extensive":[24],"applications,":[25],"OCR":[26,51,91],"is":[27,103],"considered":[28],"a":[29],"crucial":[30],"challenging":[32],"topic":[33],"field":[36],"computer":[38],"vision.":[39],"In":[40],"Persian,":[41],"unique":[43],"characteristics":[44],"script":[47],"often":[48],"result":[49],"outputs":[52],"with":[53,100],"significant":[54],"errors,":[55],"which":[56],"can":[57],"compromise":[58],"readability":[59],"comprehension":[61],"content,":[64],"emphasizing":[65],"need":[67],"for":[68],"error":[69,164],"correction,":[70],"particularly":[71],"terms":[73],"spelling.":[75],"This":[76],"study":[77],"investigates":[78],"performance":[80],"four":[82],"large":[83],"language":[84],"models-ParsBERT,":[85],"LLaMA,":[86],"Mistral,":[87],"GPT-for":[89],"enhancing":[90],"outputs.":[92],"Additionally,":[93],"an":[94],"innovative":[95],"approach":[96],"that":[97,151],"integrates":[98],"ParsBERT":[99],"other":[101],"models":[102,106,158],"introduced.":[104],"These":[105],"were":[107],"evaluated":[108],"on":[109],"three":[110],"corpora":[111],"varying":[113],"sizes":[114],"complexities":[116],"using":[117],"diverse":[118],"evaluation":[119],"metrics,":[120],"including":[121],"precision,":[122,172],"accuracy,":[123,173],"recall,":[124],"others.":[126],"Our":[127],"comparative":[128],"analysis":[129],"highlights":[130],"strengths":[132],"weaknesses":[134],"each":[136],"model":[137],"across":[138,159],"different":[139],"scenarios,":[140],"providing":[141],"deeper":[142],"insights":[143],"their":[145],"performance.":[146],"Furthermore,":[147],"results":[149],"demonstrate":[150],"proposed":[153],"hybrid":[154],"approaches":[155],"outperform":[156],"standalone":[157],"all":[160],"corpora,":[161],"significantly":[162],"reducing":[163],"rates":[165],"improving":[167],"key":[168],"metrics":[169],"such":[170],"as":[171],"recall.":[175]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
