{"id":"https://openalex.org/W7140163253","doi":"https://doi.org/10.48550/arxiv.2603.21165","title":"Many Dialects, Many Languages, One Cultural Lens: Evaluating Multilingual VLMs for Bengali Culture Understanding Across Historically Linked Languages and Regional Dialects","display_name":"Many Dialects, Many Languages, One Cultural Lens: Evaluating Multilingual VLMs for Bengali Culture Understanding Across Historically Linked Languages and Regional Dialects","publication_year":2026,"publication_date":"2026-03-22","ids":{"openalex":"https://openalex.org/W7140163253","doi":"https://doi.org/10.48550/arxiv.2603.21165"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.21165","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.21165","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.21165","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Sayeedi, Nurul Labib","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sayeedi, Nurul Labib","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sayeedi, Md. Faiyaz Abdullah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sayeedi, Md. Faiyaz Abdullah","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Dipta, Shubhashis Roy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dipta, Shubhashis Roy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tabassum, Rubaya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tabassum, Rubaya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Hridoy, Ariful Ekraj","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hridoy, Ariful Ekraj","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Mahmood, Mehraj","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahmood, Mehraj","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sobhani, Mahbub E","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sobhani, Mahbub E","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Hasan, Md. Tarek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hasan, Md. Tarek","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Shatabda, Swakkhar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shatabda, Swakkhar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9541000127792358,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9541000127792358,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.005400000140070915,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.004699999932199717,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bengali","display_name":"Bengali","score":0.9740999937057495},{"id":"https://openalex.org/keywords/hindi","display_name":"Hindi","score":0.6151000261306763},{"id":"https://openalex.org/keywords/meaning","display_name":"Meaning (existential)","score":0.5098999738693237},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4205999970436096},{"id":"https://openalex.org/keywords/cultural-diversity","display_name":"Cultural diversity","score":0.353300005197525},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.3452000021934509}],"concepts":[{"id":"https://openalex.org/C19235068","wikidata":"https://www.wikidata.org/wiki/Q9610","display_name":"Bengali","level":2,"score":0.9740999937057495},{"id":"https://openalex.org/C519982507","wikidata":"https://www.wikidata.org/wiki/Q1568","display_name":"Hindi","level":2,"score":0.6151000261306763},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.5960999727249146},{"id":"https://openalex.org/C2780876879","wikidata":"https://www.wikidata.org/wiki/Q3054749","display_name":"Meaning (existential)","level":2,"score":0.5098999738693237},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4726000130176544},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4514999985694885},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4205999970436096},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39989998936653137},{"id":"https://openalex.org/C125209646","wikidata":"https://www.wikidata.org/wiki/Q1338878","display_name":"Cultural diversity","level":2,"score":0.353300005197525},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.3452000021934509},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.31619998812675476},{"id":"https://openalex.org/C2778756302","wikidata":"https://www.wikidata.org/wiki/Q8097","display_name":"Telugu","level":2,"score":0.31220000982284546},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.30820000171661377},{"id":"https://openalex.org/C2779018934","wikidata":"https://www.wikidata.org/wiki/Q1129653","display_name":"Everyday life","level":2,"score":0.2957000136375427},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.2953000068664551},{"id":"https://openalex.org/C2777350258","wikidata":"https://www.wikidata.org/wiki/Q1617","display_name":"Urdu","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.21165","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.21165","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.21165","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.21165","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7193199396133423,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Bangla":[0,75,87],"culture":[1,42],"is":[2,68,125],"richly":[3],"expressed":[4],"through":[5],"region,":[6],"dialect,":[7],"history,":[8],"food,":[9],"politics,":[10],"media,":[11],"and":[12,47,65,67,73,108],"everyday":[13],"visual":[14,62,131],"life,":[15],"yet":[16],"it":[17],"remains":[18],"underrepresented":[19],"in":[20],"multimodal":[21,151],"evaluation.":[22],"To":[23],"address":[24],"this":[25],"gap,":[26],"we":[27],"introduce":[28],"BanglaVerse,":[29],"a":[30,142],"culturally":[31,149],"grounded":[32,150],"benchmark":[33,60],"for":[34,98,117,147],"evaluating":[35,84],"multilingual":[36],"vision-language":[37],"models":[38],"(VLMs)":[39],"on":[40],"Bengali":[41],"across":[43,56],"historically":[44,102],"linked":[45,103],"languages":[46,72,104],"regional":[48],"dialects.":[49],"Built":[50],"from":[51],"1,152":[52],"manually":[53],"curated":[54],"images":[55],"nine":[57],"domains,":[58,121],"the":[59,122],"supports":[61],"question":[63],"answering":[64],"captioning,":[66],"expanded":[69],"into":[70],"four":[71],"five":[74],"dialects,":[76],"yielding":[77],"~32.3K":[78],"artifacts.":[79],"Our":[80],"experiments":[81],"show":[82],"that":[83],"only":[85],"standard":[86],"overestimates":[88],"true":[89],"model":[90],"capability:":[91],"performance":[92],"drops":[93],"under":[94,153],"dialectal":[95],"variation,":[96],"especially":[97],"caption":[99],"generation,":[100],"while":[101],"such":[105],"as":[106,141],"Hindi":[107],"Urdu":[109],"retain":[110],"some":[111],"cultural":[112,127],"meaning":[113],"but":[114],"remain":[115],"weaker":[116],"structured":[118],"reasoning.":[119],"Across":[120],"main":[123],"bottleneck":[124],"missing":[126],"knowledge":[128],"rather":[129],"than":[130],"grounding":[132],"alone,":[133],"with":[134],"knowledge-intensive":[135],"categories.":[136],"These":[137],"findings":[138],"position":[139],"BanglaVerse":[140],"more":[143],"realistic":[144],"test":[145],"bed":[146],"measuring":[148],"understanding":[152],"linguistic":[154],"variation.":[155]},"counts_by_year":[],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2026-03-25T00:00:00"}
