{"id":"https://openalex.org/W4416037307","doi":"https://doi.org/10.18653/v1/2025.emnlp-demos.52","title":"GraDeT-HTR: A Resource-Efficient Bengali Handwritten Text Recognition System utilizing Grapheme-based Tokenizer and Decoder-only Transformer","display_name":"GraDeT-HTR: A Resource-Efficient Bengali Handwritten Text Recognition System utilizing Grapheme-based Tokenizer and Decoder-only Transformer","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416037307","doi":"https://doi.org/10.18653/v1/2025.emnlp-demos.52"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-demos.52","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-demos.52","pdf_url":"https://aclanthology.org/2025.emnlp-demos.52.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-demos.52.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100729764","display_name":"Mahmudul Hasan","orcid":"https://orcid.org/0009-0006-8961-7470"},"institutions":[{"id":"https://openalex.org/I184763120","display_name":"Bangladesh University","ror":"https://ror.org/03zta4r50","country_code":"BD","type":"education","lineage":["https://openalex.org/I184763120"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Md. Mahmudul Hasan","raw_affiliation_strings":["Computer Science and Engineering , University of Dhaka {mdmahmudul-2020215620 , ahmednesartahsin-2020115612 ,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering , University of Dhaka {mdmahmudul-2020215620 , ahmednesartahsin-2020115612 ,","institution_ids":["https://openalex.org/I184763120"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ahmed Nesar Tahsin Choudhury","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmed Nesar Tahsin Choudhury","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021605695","display_name":"Mahmudul Hasan","orcid":"https://orcid.org/0000-0002-6274-4107"},"institutions":[{"id":"https://openalex.org/I184763120","display_name":"Bangladesh University","ror":"https://ror.org/03zta4r50","country_code":"BD","type":"education","lineage":["https://openalex.org/I184763120"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Mahmudul Hasan","raw_affiliation_strings":["Computer Science and Engineering , University of Dhaka {mdmahmudul-2020215620 , ahmednesartahsin-2020115612 ,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering , University of Dhaka {mdmahmudul-2020215620 , ahmednesartahsin-2020115612 ,","institution_ids":["https://openalex.org/I184763120"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052283795","display_name":"Md. Mosaddek Khan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Md Mosaddek Khan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29049551,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"696","last_page":"706"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.0019000000320374966,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.0019000000320374966,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6230000257492065},{"id":"https://openalex.org/keywords/text-recognition","display_name":"Text recognition","score":0.490200012922287},{"id":"https://openalex.org/keywords/bengali","display_name":"Bengali","score":0.46619999408721924},{"id":"https://openalex.org/keywords/character-recognition","display_name":"Character recognition","score":0.3804999887943268},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.31459999084472656}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7038000226020813},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6230000257492065},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.571399986743927},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5440000295639038},{"id":"https://openalex.org/C2983812711","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text recognition","level":3,"score":0.490200012922287},{"id":"https://openalex.org/C19235068","wikidata":"https://www.wikidata.org/wiki/Q9610","display_name":"Bengali","level":2,"score":0.46619999408721924},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43560001254081726},{"id":"https://openalex.org/C2987247673","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Character recognition","level":3,"score":0.3804999887943268},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.23729999363422394}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-demos.52","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-demos.52","pdf_url":"https://aclanthology.org/2025.emnlp-demos.52.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-demos.52","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-demos.52","pdf_url":"https://aclanthology.org/2025.emnlp-demos.52.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320323933","display_name":"University of Dhaka","ror":"https://ror.org/05wv2vq37"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416037307.pdf","grobid_xml":"https://content.openalex.org/works/W4416037307.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"Bengali":[1,17,23,49,66],"being":[2],"the":[3,9,62,70],"sixth":[4],"most":[5],"spoken":[6],"language":[7],"in":[8],"world,":[10],"handwritten":[11,50],"text":[12,51],"recognition":[13,52,88],"(HTR)":[14],"systems":[15],"for":[16],"remain":[18],"severely":[19],"underdeveloped.The":[20],"complexity":[21],"of":[22,35,65,72],"script-featuring":[24],"conjuncts,":[25],"diacritics,":[26],"and":[27,81,102],"highly":[28],"variable":[29],"handwriting":[30],"styles-combined":[31],"with":[32],"a":[33,47,56,73,78],"scarcity":[34],"annotated":[36],"datasets":[37],"makes":[38],"this":[39],"task":[40],"particularly":[41],"challenging.We":[42],"present":[43],"GraDeT-HTR":[44],"1":[45],",":[46],"resource-efficient":[48],"system":[53],"based":[54],"on":[55,98,104,111],"Grapheme-aware":[57],"Decoder-only":[58],"Transformer":[59],"architecture.To":[60],"address":[61],"unique":[63],"challenges":[64],"script,":[67],"we":[68],"augment":[69],"performance":[71,110],"decoder-only":[74],"transformer":[75],"by":[76],"integrating":[77],"grapheme-based":[79],"tokenizer":[80],"demonstrate":[82],"2":[83],"that":[84],"it":[85],"significantly":[86],"improves":[87],"accuracy":[89],"compared":[90],"to":[91],"conventional":[92],"subword":[93],"tokenizers.Our":[94],"model":[95],"is":[96],"pretrained":[97],"large-scale":[99],"synthetic":[100],"data":[101],"fine-tuned":[103],"real":[105],"humanannotated":[106],"samples,":[107],"achieving":[108],"state-of-the-art":[109],"multiple":[112],"benchmark":[113],"datasets.":[114]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-08T00:00:00"}
