{"id":"https://openalex.org/W7151452899","doi":"https://doi.org/10.48550/arxiv.2604.03395","title":"Are Arabic Benchmarks Reliable? QIMMA's Quality-First Approach to LLM Evaluation","display_name":"Are Arabic Benchmarks Reliable? QIMMA's Quality-First Approach to LLM Evaluation","publication_year":2026,"publication_date":"2026-04-03","ids":{"openalex":"https://openalex.org/W7151452899","doi":"https://doi.org/10.48550/arxiv.2604.03395"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03395","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03395","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03395","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133123979","display_name":"Leen AlQadi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"AlQadi, Leen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114285257","display_name":"Ahmed Alzubaidi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alzubaidi, Ahmed","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133077499","display_name":"Mohammed Alyafeai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alyafeai, Mohammed","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092093417","display_name":"Hamza Alobeidli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alobeidli, Hamza","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006369216","display_name":"Maitha Alhammadi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alhammadi, Maitha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120034063","display_name":"Shaikha Alsuwaidi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alsuwaidi, Shaikha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133134337","display_name":"Omar Alkaabi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alkaabi, Omar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133112482","display_name":"Basma El Amel Boussaha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Boussaha, Basma El Amel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133125629","display_name":"Hakim Hacid","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hacid, Hakim","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.6621000170707703,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.6621000170707703,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.09769999980926514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.08370000123977661,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.6525999903678894},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.6208000183105469},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5701000094413757},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.536300003528595},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4717999994754791},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.44359999895095825}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7192999720573425},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.6525999903678894},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.6208000183105469},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6158999800682068},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5701000094413757},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5371999740600586},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.536300003528595},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4717999994754791},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.44359999895095825},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3971000015735626},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.36059999465942383},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.29899999499320984},{"id":"https://openalex.org/C95318506","wikidata":"https://www.wikidata.org/wiki/Q6588467","display_name":"Textual entailment","level":3,"score":0.2962999939918518},{"id":"https://openalex.org/C198370458","wikidata":"https://www.wikidata.org/wiki/Q586459","display_name":"Type inference","level":3,"score":0.2921999990940094},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2892000079154968},{"id":"https://openalex.org/C190812933","wikidata":"https://www.wikidata.org/wiki/Q28923","display_name":"Chart","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.26260000467300415}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03395","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03395","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03395","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03395","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"QIMMA,":[2],"a":[3,24,51,93],"quality-assured":[4],"Arabic":[5,44,65,99],"LLM":[6,30],"leaderboard":[7],"that":[8],"places":[9],"systematic":[10,39],"benchmark":[11],"validation":[12],"at":[13],"its":[14],"core.":[15],"Rather":[16],"than":[17],"aggregating":[18],"existing":[19],"resources":[20],"as-is,":[21],"QIMMA":[22,92],"applies":[23],"multi-model":[25],"assessment":[26],"pipeline":[27],"combining":[28],"automated":[29],"judgment":[31],"with":[32],"human":[33],"review":[34],"to":[35],"surface":[36],"and":[37,84,95],"resolve":[38],"quality":[40],"issues":[41],"in":[42,63],"well-established":[43],"benchmarks":[45],"before":[46],"evaluation.":[47,101],"The":[48],"result":[49],"is":[50],"curated,":[52],"multi-domain,":[53],"multi-task":[54],"evaluation":[55,68],"suite":[56],"of":[57,87],"over":[58],"52k":[59],"samples,":[60],"grounded":[61],"predominantly":[62],"native":[64],"content;":[66],"code":[67],"tasks":[69],"are":[70,76],"the":[71],"sole":[72],"exception,":[73],"as":[74],"they":[75],"inherently":[77],"language-agnostic.":[78],"Transparent":[79],"implementation":[80],"via":[81],"LightEval,":[82],"EvalPlus":[83],"public":[85],"release":[86],"per-sample":[88],"inference":[89],"outputs":[90],"make":[91],"reproducible":[94],"community-extensible":[96],"foundation":[97],"for":[98],"NLP":[100]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-08T00:00:00"}
