{"id":"https://openalex.org/W7138299327","doi":"https://doi.org/10.1609/aaai.v40i11.37829","title":"MME-SCI: A Comprehensive and Challenging Science Benchmark for Multimodal Large Language Models","display_name":"MME-SCI: A Comprehensive and Challenging Science Benchmark for Multimodal Large Language Models","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138299327","doi":"https://doi.org/10.1609/aaai.v40i11.37829"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i11.37829","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37829","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i11.37829","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129699773","display_name":"Jiacheng Ruan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jiacheng Ruan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129682824","display_name":"Dan Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dan Jiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129673104","display_name":"Xian Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xian Gao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129653034","display_name":"Ting Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ting Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129722121","display_name":"Yuzhuo Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuzhuo Fu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129643236","display_name":"Yangyang Kang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yangyang Kang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5129699773"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.51043339,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"11","first_page":"8760","last_page":"8768"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.295199990272522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.295199990272522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.2143000066280365,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.09769999980926514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.541100025177002},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.531499981880188},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4952000081539154},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.4693000018596649},{"id":"https://openalex.org/keywords/strengths-and-weaknesses","display_name":"Strengths and weaknesses","score":0.44020000100135803},{"id":"https://openalex.org/keywords/cover","display_name":"Cover (algebra)","score":0.3937999904155731},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.3628000020980835},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.3447999954223633}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7599999904632568},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.541100025177002},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.531499981880188},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4952000081539154},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4887999892234802},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48590001463890076},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.4693000018596649},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.44020000100135803},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.3937999904155731},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.3628000020980835},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.3447999954223633},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32499998807907104},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3102000057697296},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3084999918937683},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C2992562121","wikidata":"https://www.wikidata.org/wiki/Q3817808","display_name":"Scientific reasoning","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C184356942","wikidata":"https://www.wikidata.org/wiki/Q830382","display_name":"Best practice","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i11.37829","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37829","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i11.37829","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37829","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8702493906021118,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recently,":[0],"multimodal":[1],"large":[2],"language":[3],"models":[4,131],"(MLLMs)":[5],"have":[6,17,31],"achieved":[7,156],"significant":[8],"advancements":[9],"across":[10],"various":[11],"domains,":[12],"and":[13,21,87,112,114,122,132,136,163,169,187,198],"corresponding":[14],"evaluation":[15,53,101,153],"benchmarks":[16,26,45],"been":[18],"continuously":[19],"refined":[20],"improved.":[22],"In":[23],"this":[24],"process,":[25],"in":[27,36,58,165,196,202,207],"the":[28,38,137,151,225,231,236],"scientific":[29,75,237],"domain":[30],"played":[32],"an":[33],"important":[34],"role":[35],"assessing":[37],"reasoning":[39,56,238],"capabilities":[40,239],"of":[41,54,64,71,74,158,219,240],"MLLMs.":[42,147,241],"However,":[43],"existing":[44,146,180,193],"still":[46],"face":[47],"three":[48],"key":[49],"challenges:":[50],"1)":[51],"Insufficient":[52],"models'":[55,194],"abilities":[57],"multilingual":[59,186],"scenarios;":[60],"2)":[61],"Inadequate":[62],"assessment":[63],"MLLMs'":[65],"comprehensive":[66,86],"modality":[67],"coverage;":[68],"3)":[69],"Lack":[70],"fine-grained":[72,188],"annotation":[73],"knowledge":[76,189],"points.":[77],"To":[78],"address":[79],"these":[80],"gaps,":[81],"we":[82,191],"propose":[83],"MME-SCI,":[84],"a":[85,173],"challenging":[88,144],"benchmark.":[89],"We":[90,124],"carefully":[91],"collected":[92],"1,019":[93],"high-quality":[94],"question-answer":[95],"pairs,":[96],"which":[97],"involve":[98],"3":[99],"distinct":[100],"modes.":[102],"These":[103,228],"pairs":[104],"cover":[105],"four":[106],"subjects,":[107],"namely":[108],"mathematics,":[109,166],"physics,":[110,167],"chemistry,":[111,168],"biology,":[113,170],"support":[115],"five":[116],"languages:":[117],"Chinese,":[118],"English,":[119],"French,":[120],"Spanish,":[121],"Japanese.":[123],"conducted":[125],"extensive":[126],"experiments":[127],"on":[128],"16":[129],"open-source":[130],"4":[133],"closed-source":[134],"models,":[135],"results":[138],"demonstrate":[139],"that":[140],"MME-SCI":[141],"is":[142],"widely":[143],"for":[145],"For":[148,205],"instance,":[149],"under":[150],"Image-only":[152],"mode,":[154],"o4-mini":[155,213],"accuracy":[157],"only":[159,216],"52.11%,":[160],"24.73%,":[161],"36.57%,":[162],"29.80%":[164],"respectively,":[171],"indicating":[172],"significantly":[174],"higher":[175],"difficulty":[176],"level":[177],"compared":[178],"to":[179,210,234],"benchmarks.":[181],"More":[182],"importantly,":[183],"using":[184],"MME-SCI's":[185],"attributes,":[190],"analyzed":[192],"performance":[195],"depth":[197],"identified":[199],"their":[200],"weaknesses":[201],"specific":[203],"domains.":[204],"example,":[206],"questions":[208],"related":[209],"\"Magnetic":[211],"Field\",":[212],"correctly":[214],"answered":[215],"5":[217],"out":[218],"33":[220],"questions,":[221],"thereby":[222],"fine-grainedly":[223],"exposing":[224],"model's":[226],"vulnerabilities.":[227],"findings":[229],"highlight":[230],"urgent":[232],"need":[233],"enhance":[235]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
