{"id":"https://openalex.org/W7135223756","doi":"https://doi.org/10.48550/arxiv.2603.11414","title":"MaterialFigBENCH: benchmark dataset with figures for evaluating college-level materials science problem-solving abilities of multimodal large language models","display_name":"MaterialFigBENCH: benchmark dataset with figures for evaluating college-level materials science problem-solving abilities of multimodal large language models","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7135223756","doi":"https://doi.org/10.48550/arxiv.2603.11414"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.11414","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11414","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.11414","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128981080","display_name":"Michiko Yoshitake","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoshitake, Michiko","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128970132","display_name":"Yuta Suzuki","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Suzuki, Yuta","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066135113","display_name":"Ryo Igarashi","orcid":"https://orcid.org/0000-0002-2894-7226"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Igarashi, Ryo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129092253","display_name":"Yoshitaka Ushiku","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ushiku, Yoshitaka","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5035168194","display_name":"Keisuke Nagato","orcid":"https://orcid.org/0000-0003-2399-3087"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nagato, Keisuke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.984499990940094,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.984499990940094,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11825","display_name":"Catalysis and Oxidation Reactions","score":0.0007999999797903001,"subfield":{"id":"https://openalex.org/subfields/1503","display_name":"Catalysis"},"field":{"id":"https://openalex.org/fields/15","display_name":"Chemical Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.000699999975040555,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7615000009536743},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.7045999765396118},{"id":"https://openalex.org/keywords/interpretation","display_name":"Interpretation (philosophy)","score":0.5745999813079834},{"id":"https://openalex.org/keywords/reading","display_name":"Reading (process)","score":0.5113999843597412},{"id":"https://openalex.org/keywords/strengths-and-weaknesses","display_name":"Strengths and weaknesses","score":0.4287000000476837},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.3880999982357025},{"id":"https://openalex.org/keywords/schematic","display_name":"Schematic","score":0.3544999957084656}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7615000009536743},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.7045999765396118},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6599000096321106},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.5745999813079834},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.5113999843597412},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.44920000433921814},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.4287000000476837},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4142000079154968},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.3880999982357025},{"id":"https://openalex.org/C192328126","wikidata":"https://www.wikidata.org/wiki/Q4514647","display_name":"Schematic","level":2,"score":0.3544999957084656},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.34709998965263367},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.33239999413490295},{"id":"https://openalex.org/C44280652","wikidata":"https://www.wikidata.org/wiki/Q104837","display_name":"Phase (matter)","level":2,"score":0.329800009727478},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.30399999022483826},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29490000009536743},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2865000069141388},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.11414","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11414","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.11414","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11414","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8937923908233643}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,115],"present":[1],"MaterialFigBench,":[2],"a":[3,78,210],"benchmark":[4,208],"dataset":[5,65],"designed":[6],"to":[7,17],"evaluate":[8,116],"the":[9,183,226],"ability":[10],"of":[11,27,67,81,96,162,228],"multimodal":[12,119,217],"large":[13],"language":[14],"models":[15,125],"(LLMs)":[16],"solve":[18],"university-level":[19],"materials":[20,74,163,221],"science":[21,75,164,222],"problems":[22,41,70],"that":[23,32],"require":[24],"accurate":[25],"interpretation":[26,161],"figures.":[28,165],"Unlike":[29],"existing":[30],"benchmarks":[31],"primarily":[33],"rely":[34],"on":[35,40,175],"textual":[36],"representations,":[37],"MaterialFigBench":[38,186],"focuses":[39],"in":[42,102,190,220],"which":[43],"figures":[44],"such":[45],"as":[46],"phase":[47,89,91],"diagrams,":[48,90],"stress-strain":[49],"curves,":[50],"Arrhenius":[51],"plots,":[52],"diffraction":[53],"patterns,":[54],"and":[55,93,123,130,137,159,195,212,223],"microstructural":[56],"schematics":[57],"are":[58,111,171],"indispensable":[59],"for":[60,215,224],"deriving":[61],"correct":[62,169],"answers.":[63],"The":[64,140],"consists":[66],"137":[68],"free-response":[69],"adapted":[71],"from":[72,106],"standard":[73],"textbooks,":[76],"covering":[77],"broad":[79],"range":[80],"topics":[82],"including":[83,121],"crystal":[84],"structures,":[85],"mechanical":[86],"properties,":[87],"diffusion,":[88],"transformations,":[92],"electronic":[94],"properties":[95],"materials.":[97],"To":[98],"address":[99],"unavoidable":[100],"ambiguity":[101],"reading":[103,182],"numerical":[104,193],"values":[105],"images,":[107],"expert-defined":[108],"answer":[109],"ranges":[110],"provided":[112,184],"where":[113,203],"appropriate.":[114],"several":[117],"state-of-the-art":[118],"LLMs,":[120],"ChatGPT":[122],"GPT":[124],"accessed":[126],"via":[127],"OpenAI":[128],"APIs,":[129],"analyze":[131],"their":[132],"performance":[133,204],"across":[134],"problem":[135,201],"categories":[136],"model":[138,149],"versions.":[139],"results":[141],"reveal":[142],"that,":[143],"although":[144],"overall":[145],"accuracy":[146],"improves":[147],"with":[148,155,231],"updates,":[150],"current":[151],"LLMs":[152,230],"still":[153],"struggle":[154],"genuine":[156],"visual":[157,191],"understanding":[158],"quantitative":[160],"In":[166],"many":[167],"cases,":[168],"answers":[170],"obtained":[172],"by":[173,181],"relying":[174],"memorized":[176],"domain":[177],"knowledge":[178],"rather":[179],"than":[180],"images.":[185],"highlights":[187],"persistent":[188],"weaknesses":[189],"reasoning,":[192],"precision,":[194],"significant-digit":[196],"handling,":[197],"while":[198],"also":[199],"identifying":[200],"types":[202],"has":[205],"improved.":[206],"This":[207],"provides":[209],"systematic":[211],"domain-specific":[213],"foundation":[214],"advancing":[216],"reasoning":[218],"capabilities":[219],"guiding":[225],"development":[227],"future":[229],"stronger":[232],"figure-based":[233],"understanding.":[234]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-14T00:00:00"}
