{"id":"https://openalex.org/W7131418507","doi":"https://doi.org/10.48550/arxiv.2602.20065","title":"Multilingual Large Language Models do not comprehend all natural languages to equal degrees","display_name":"Multilingual Large Language Models do not comprehend all natural languages to equal degrees","publication_year":2026,"publication_date":"2026-02-23","ids":{"openalex":"https://openalex.org/W7131418507","doi":"https://doi.org/10.48550/arxiv.2602.20065"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.20065","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20065","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.20065","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123611621","display_name":"Natalia Moskvina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moskvina, Natalia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Montero, Raquel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Montero, Raquel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126197978","display_name":"Masaya Yoshida","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoshida, Masaya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087341558","display_name":"Ferdy Hubers","orcid":"https://orcid.org/0000-0002-2298-6013"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hubers, Ferdy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123418050","display_name":"Paolo Morosi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Morosi, Paolo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126204192","display_name":"Walid Irhaymi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Irhaymi, Walid","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000444143","display_name":"Yan Jin","orcid":"https://orcid.org/0000-0001-5957-7443"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Jin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126185188","display_name":"Tamara Serrano","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Serrano, Tamara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090836370","display_name":"Elena Pagliarini","orcid":"https://orcid.org/0000-0002-8644-0984"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pagliarini, Elena","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085589914","display_name":"Fritz G\u00fcnther","orcid":"https://orcid.org/0000-0002-9205-6786"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"G\u00fcnther, Fritz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126812700","display_name":"Evelina Leivada","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leivada, Evelina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.12639999389648438,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.12639999389648438,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.10899999737739563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.09290000051259995,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.6029000282287598},{"id":"https://openalex.org/keywords/comprehension","display_name":"Comprehension","score":0.5659000277519226},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4912000000476837},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4657999873161316},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.3937999904155731},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.38519999384880066},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.37059998512268066},{"id":"https://openalex.org/keywords/romance-languages","display_name":"Romance languages","score":0.3506999909877777}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6958000063896179},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.6280999779701233},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.6029000282287598},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.5659000277519226},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5393999814987183},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5314000248908997},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4912000000476837},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4657999873161316},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3937999904155731},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.38519999384880066},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.37059998512268066},{"id":"https://openalex.org/C41132520","wikidata":"https://www.wikidata.org/wiki/Q19814","display_name":"Romance languages","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C116081451","wikidata":"https://www.wikidata.org/wiki/Q1501364","display_name":"Language transfer","level":4,"score":0.3384000062942505},{"id":"https://openalex.org/C79078291","wikidata":"https://www.wikidata.org/wiki/Q980142","display_name":"Linguistic universal","level":3,"score":0.3183000087738037},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.29190000891685486},{"id":"https://openalex.org/C129353971","wikidata":"https://www.wikidata.org/wiki/Q5156949","display_name":"Comprehension approach","level":3,"score":0.29179999232292175},{"id":"https://openalex.org/C2993724205","wikidata":"https://www.wikidata.org/wiki/Q315","display_name":"Human language","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C59985594","wikidata":"https://www.wikidata.org/wiki/Q1758140","display_name":"Contrastive linguistics","level":3,"score":0.27889999747276306},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.2705000042915344},{"id":"https://openalex.org/C171041071","wikidata":"https://www.wikidata.org/wiki/Q36870","display_name":"First language","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C504331141","wikidata":"https://www.wikidata.org/wiki/Q59203","display_name":"Diglossia","level":3,"score":0.2547999918460846},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25130000710487366},{"id":"https://openalex.org/C2779313563","wikidata":"https://www.wikidata.org/wiki/Q17072565","display_name":"On Language","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.20065","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20065","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.20065","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20065","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7333007454872131,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"play":[4],"a":[5,91],"critical":[6],"role":[7,166],"in":[8,35,72,79,128,190],"how":[9],"humans":[10],"access":[11],"information.":[12],"While":[13],"their":[14],"core":[15],"use":[16],"relies":[17],"on":[18,90],"comprehending":[19],"written":[20],"requests,":[21],"our":[22],"understanding":[23],"of":[24,83,130,167,184],"this":[25],"ability":[26],"is":[27,52,55,142],"currently":[28],"limited,":[29],"because":[30],"most":[31],"benchmarks":[32],"evaluate":[33],"LLMs":[34],"high-resource":[36],"languages":[37,64,194],"predominantly":[38],"spoken":[39],"by":[40,152,163],"Western,":[41],"Educated,":[42],"Industrialised,":[43],"Rich,":[44],"and":[45,104,181,187,195],"Democratic":[46],"(WEIRD)":[47],"communities.":[48,199],"The":[49],"default":[50],"assumption":[51],"that":[53,111,170],"English":[54,141],"the":[56,80,99,112,144,161,165],"best-performing":[57,145],"language":[58,92,106,177],"for":[59],"LLMs,":[60,84],"while":[61],"smaller,":[62],"low-resource":[63,193],"are":[65],"linked":[66],"to":[67,133,137],"less":[68],"reliable":[69],"outputs,":[70],"even":[71,156],"multilingual,":[73],"state-of-the-art":[74],"models.":[75],"To":[76],"track":[77],"variation":[78],"comprehension":[81,93],"abilities":[82],"we":[85],"prompt":[86],"3":[87],"popular":[88],"models":[89,113],"task":[94],"across":[95,118],"12":[96],"languages,":[97,121,155],"representing":[98],"Indo-European,":[100],"Afro-Asiatic,":[101],"Turkic,":[102],"Sino-Tibetan,":[103],"Japonic":[105],"families.":[107],"Our":[108],"results":[109,162],"suggest":[110],"exhibit":[114],"remarkable":[115],"linguistic":[116],"accuracy":[117],"typologically":[119],"diverse":[120],"yet":[122],"they":[123],"fall":[124],"behind":[125],"human":[126],"baselines":[127],"all":[129],"them,":[131],"albeit":[132],"different":[134],"degrees.":[135],"Contrary":[136],"what":[138],"was":[139,149],"expected,":[140],"not":[143],"language,":[146],"as":[147,175],"it":[148],"systematically":[150],"outperformed":[151],"several":[153,168],"Romance":[154],"lower-resource":[157],"ones.":[158],"We":[159],"frame":[160],"discussing":[164],"factors":[169],"drive":[171],"LLM":[172],"performance,":[173],"such":[174],"tokenization,":[176],"distance":[178],"from":[179],"Spanish":[180],"English,":[182],"size":[183],"training":[185],"data,":[186],"data":[188],"origin":[189],"high-":[191],"vs.":[192,197],"WEIRD":[196],"non-WEIRD":[198]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-26T00:00:00"}
