{"id":"https://openalex.org/W7128721629","doi":"https://doi.org/10.48550/arxiv.2602.10732","title":"Macaron: Controlled, Human-Written Benchmark for Multilingual and Multicultural Reasoning via Template-Filling","display_name":"Macaron: Controlled, Human-Written Benchmark for Multilingual and Multicultural Reasoning via Template-Filling","publication_year":2026,"publication_date":"2026-02-11","ids":{"openalex":"https://openalex.org/W7128721629","doi":"https://doi.org/10.48550/arxiv.2602.10732"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.10732","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093311278","display_name":"Alaa Elsetohy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elsetohy, Alaa","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093311277","display_name":"Sama Hadhoud","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hadhoud, Sama","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019999768","display_name":"Haryo Akbarianto Wibowo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wibowo, Haryo Akbarianto","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125733289","display_name":"Chenxi Whitehouse","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Whitehouse, Chenxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125704182","display_name":"Genta Indra Winata","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Winata, Genta Indra","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125701292","display_name":"Fajri Koto","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koto, Fajri","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125694000","display_name":"Alham Fikri Aji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aji, Alham Fikri","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6247000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6247000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.11789999902248383,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.037300001829862595,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6984000205993652},{"id":"https://openalex.org/keywords/multilingualism","display_name":"Multilingualism","score":0.3889000117778778},{"id":"https://openalex.org/keywords/cover","display_name":"Cover (algebra)","score":0.3765999972820282},{"id":"https://openalex.org/keywords/template","display_name":"Template","score":0.351500004529953},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.34439998865127563},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.29179999232292175}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7210999727249146},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6984000205993652},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5655999779701233},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5544000267982483},{"id":"https://openalex.org/C2780035574","wikidata":"https://www.wikidata.org/wiki/Q30081","display_name":"Multilingualism","level":2,"score":0.3889000117778778},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.3765999972820282},{"id":"https://openalex.org/C82714645","wikidata":"https://www.wikidata.org/wiki/Q438331","display_name":"Template","level":2,"score":0.351500004529953},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.34439998865127563},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C542530943","wikidata":"https://www.wikidata.org/wiki/Q190656","display_name":"Multiculturalism","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2623000144958496},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.2597000002861023},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.25940001010894775},{"id":"https://openalex.org/C2778883600","wikidata":"https://www.wikidata.org/wiki/Q2390977","display_name":"Language proficiency","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.10732","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.10732","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.10732","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.10732","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.761748731136322,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multilingual":[0],"benchmarks":[1],"rarely":[2],"test":[3],"reasoning":[4,22,32,47],"over":[5,20],"culturally":[6],"grounded":[7],"premises:":[8],"translated":[9],"datasets":[10,16],"keep":[11],"English-centric":[12],"scenarios,":[13],"while":[14,115],"culture-first":[15],"often":[17,124],"lack":[18],"control":[19],"the":[21,103,137],"required.":[23],"We":[24],"propose":[25],"Macaron,":[26],"a":[27],"template-first":[28],"benchmark":[29],"that":[30,44],"factorizes":[31],"type":[33],"and":[34,57,61,76,79,89,108,112,123,132],"cultural":[35,50],"aspect":[36],"across":[37],"question":[38],"languages.":[39],"Using":[40],"100":[41],"language-agnostic":[42],"templates":[43,134],"cover":[45],"7":[46],"types,":[48],"22":[49],"aspects,":[51],"native":[52],"annotators":[53],"create":[54],"scenario-aligned":[55],"English":[56,111],"local-language":[58],"multiple-choice":[59],"questions,":[60],"systematically":[62],"derived":[63],"True/False":[64],"questions.":[65],"Macaron":[66],"contains":[67],"11,862":[68],"instances":[69],"spanning":[70],"20":[71,77],"countries/cultural":[72],"contexts,":[73],"10":[74],"scripts,":[75],"languages":[78,122],"dialects":[80],"(including":[81],"low-resource":[82],"ones":[83],"like":[84],"Amharic,":[85],"Yoruba,":[86],"Zulu,":[87],"Kyrgyz,":[88],"some":[90],"Arabic":[91],"dialects).":[92],"In":[93],"zero-shot":[94],"evaluation":[95],"of":[96],"21":[97],"multilingual":[98],"LLMs,":[99],"reasoning-mode":[100],"models":[101,117],"achieve":[102],"strongest":[104],"performance":[105],"(80.8%":[106],"overall)":[107],"near-parity":[109],"between":[110],"local":[113,121],"languages,":[114],"open-weight":[116],"degrade":[118],"substantially":[119],"in":[120],"approach":[125],"chance":[126],"on":[127],"T/F":[128],"tasks.":[129],"Culture-grounded":[130],"mathematical":[131],"counting":[133],"are":[135],"consistently":[136],"hardest.":[138],"The":[139],"data":[140],"can":[141],"be":[142],"accessed":[143],"here":[144],"https://huggingface.co/datasets/AlaaAhmed2444/Macaron.":[145]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-13T00:00:00"}
