{"id":"https://openalex.org/W4416245729","doi":"https://doi.org/10.48550/arxiv.2510.18898","title":"Transformer-Based Low-Resource Language Translation: A Study on Standard Bengali to Sylheti","display_name":"Transformer-Based Low-Resource Language Translation: A Study on Standard Bengali to Sylheti","publication_year":2025,"publication_date":"2025-10-20","ids":{"openalex":"https://openalex.org/W4416245729","doi":"https://doi.org/10.48550/arxiv.2510.18898"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2510.18898","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.18898","pdf_url":"https://arxiv.org/pdf/2510.18898","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.18898","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116052394","display_name":"Mangsura Kabir Oni","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Oni, Mangsura Kabir","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5038520407","display_name":"Tabia Tanzin Prama","orcid":"https://orcid.org/0009-0008-6234-3101"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prama, Tabia Tanzin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5116052394"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.6643999814987183,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.6643999814987183,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07440000027418137,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.0364999994635582,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bengali","display_name":"Bengali","score":0.7103000283241272},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.6283000111579895},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5336999893188477},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3946000039577484},{"id":"https://openalex.org/keywords/statistical-analysis","display_name":"Statistical analysis","score":0.31859999895095825},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.31610000133514404}],"concepts":[{"id":"https://openalex.org/C19235068","wikidata":"https://www.wikidata.org/wiki/Q9610","display_name":"Bengali","level":2,"score":0.7103000283241272},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.6283000111579895},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6193000078201294},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5774999856948853},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5770999789237976},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5336999893188477},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.49939998984336853},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3946000039577484},{"id":"https://openalex.org/C2986587452","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical analysis","level":2,"score":0.31859999895095825},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.31610000133514404},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C135784402","wikidata":"https://www.wikidata.org/wiki/Q6958279","display_name":"Evaluation of machine translation","level":5,"score":0.2669999897480011},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.2556999921798706},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.25209999084472656}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2510.18898","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.18898","pdf_url":"https://arxiv.org/pdf/2510.18898","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2510.18898","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.18898","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.18898","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.18898","pdf_url":"https://arxiv.org/pdf/2510.18898","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Machine":[0],"Translation":[1],"(MT)":[2],"has":[3],"advanced":[4],"from":[5],"rule-based":[6],"and":[7,47,72,90],"statistical":[8],"methods":[9,20],"to":[10,92],"neural":[11],"approaches":[12],"based":[13],"on":[14],"the":[15,68,75,82],"Transformer":[16,45],"architecture.":[17],"While":[18],"these":[19],"have":[21],"achieved":[22],"impressive":[23],"results":[24,57],"for":[25,87],"high-resource":[26],"languages,":[27],"low-resource":[28],"varieties":[29],"such":[30],"as":[31],"Sylheti":[32],"remain":[33],"underexplored.":[34],"In":[35],"this":[36],"work,":[37],"we":[38],"investigate":[39],"Bengali-to-Sylheti":[40],"translation":[41,70],"by":[42],"fine-tuning":[43],"multilingual":[44],"models":[46,54,61],"comparing":[48],"them":[49],"with":[50,65],"zero-shot":[51],"large":[52],"language":[53,97],"(LLMs).":[55],"Experimental":[56],"demonstrate":[58],"that":[59],"fine-tuned":[60],"significantly":[62],"outperform":[63],"LLMs,":[64],"mBART-50":[66],"achieving":[67],"highest":[69],"adequacy":[71],"MarianMT":[73],"showing":[74],"strongest":[76],"character-level":[77],"fidelity.":[78],"These":[79],"findings":[80],"highlight":[81],"importance":[83],"of":[84],"task-specific":[85],"adaptation":[86],"underrepresented":[88],"languages":[89],"contribute":[91],"ongoing":[93],"efforts":[94],"toward":[95],"inclusive":[96],"technologies.":[98]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
