{"id":"https://openalex.org/W7151504492","doi":"https://doi.org/10.48550/arxiv.2604.03893","title":"FeynmanBench: Benchmarking Multimodal LLMs on Diagrammatic Physics Reasoning","display_name":"FeynmanBench: Benchmarking Multimodal LLMs on Diagrammatic Physics Reasoning","publication_year":2026,"publication_date":"2026-04-04","ids":{"openalex":"https://openalex.org/W7151504492","doi":"https://doi.org/10.48550/arxiv.2604.03893"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03893","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03893","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03893","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133124018","display_name":"Zeyu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Zeyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091947214","display_name":"Xiaogang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jingye","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133067807","display_name":"Peiyao Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiaogang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000206292","display_name":"Qinhao Kong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Peiyao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133110731","display_name":"Ben Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kong, Qinhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127310476","display_name":"Chengliang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ben","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127383435","display_name":"Zichao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Chengliang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133112159","display_name":"Bing Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zichao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133089454","display_name":"Hu Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Bing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Wei, Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Hu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5133124018"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.26510000228881836,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.26510000228881836,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.17870000004768372,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.13169999420642853,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/diagrammatic-reasoning","display_name":"Diagrammatic reasoning","score":0.9042999744415283},{"id":"https://openalex.org/keywords/feynman-diagram","display_name":"Feynman diagram","score":0.6348000168800354},{"id":"https://openalex.org/keywords/notation","display_name":"Notation","score":0.46239998936653137},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.4081999957561493},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.3555999994277954},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.34619998931884766},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.34150001406669617}],"concepts":[{"id":"https://openalex.org/C106624574","wikidata":"https://www.wikidata.org/wiki/Q5270387","display_name":"Diagrammatic reasoning","level":2,"score":0.9042999744415283},{"id":"https://openalex.org/C65574998","wikidata":"https://www.wikidata.org/wiki/Q386272","display_name":"Feynman diagram","level":2,"score":0.6348000168800354},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.559499979019165},{"id":"https://openalex.org/C45357846","wikidata":"https://www.wikidata.org/wiki/Q2001982","display_name":"Notation","level":2,"score":0.46239998936653137},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.41449999809265137},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.4081999957561493},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4041999876499176},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3555999994277954},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.34619998931884766},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.34150001406669617},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3253999948501587},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.2980000078678131},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.28870001435279846},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C65059942","wikidata":"https://www.wikidata.org/wiki/Q270105","display_name":"Argumentation theory","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.25699999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03893","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03893","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03893","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03893","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Life in Land","score":0.503134548664093,"id":"https://metadata.un.org/sdg/15"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Current":[0],"multimodal":[1,129,169],"benchmarks":[2],"for":[3,168],"scientific":[4,173,184],"reasoning":[5,170],"primarily":[6],"evaluate":[7],"local":[8,141],"information":[9],"extraction":[10],"--":[11,96],"models":[12,27,89,137],"recognize":[13],"symbols":[14],"and":[15,17,43,50,70,87,105,113,120,144,155,175],"values":[16],"then":[18],"perform":[19],"textual":[20,85],"inference.":[21],"They":[22],"do":[23],"not":[24],"assess":[25],"whether":[26],"can":[28],"reason":[29],"over":[30,59,171],"the":[31,44,67,74,92,106,117],"global":[32],"structural":[33],"properties":[34],"of":[35,58,73,179],"formal":[36,172],"diagrams,":[37,118],"such":[38],"as":[39],"topology,":[40],"conservation":[41],"constraints,":[42],"consistent":[45,134],"mapping":[46],"between":[47],"visual":[48],"patterns":[49],"algebraic":[51,160],"expressions.":[52],"We":[53],"introduce":[54],"FeynmanBench,":[55],"a":[56,80,133,165],"benchmark":[57],"2,000":[60],"tasks":[61],"centered":[62],"on":[63,140,151,158],"Feynman":[64],"diagrams":[65,174],"spanning":[66],"electromagnetic,":[68],"weak,":[69],"strong":[71],"interactions":[72],"Standard":[75],"Model.":[76],"Each":[77],"instance":[78],"couples":[79],"diagram":[81],"image":[82],"with":[83],"minimal":[84],"conventions":[86],"requires":[88],"to":[90,149],"recover":[91],"full":[93,159],"physical":[94],"content":[95],"vertex":[97],"inventory,":[98],"propagator":[99,145],"types,":[100],"topological":[101,152],"connectivity,":[102],"momentum":[103],"routing,":[104],"complete":[107],"scattering":[108],"amplitude.":[109],"An":[110],"automated":[111],"generation":[112],"verification":[114],"pipeline":[115],"produces":[116],"annotations,":[119],"reference":[121],"answers":[122],"under":[123],"standardized":[124],"rules.":[125],"Evaluating":[126],"19":[127],"state-of-the-art":[128],"LLMs,":[130],"we":[131],"find":[132],"failure":[135],"pattern:":[136],"achieve":[138],"70--95\\%":[139],"recognition":[142],"(vertex":[143],"identification)":[146],"but":[147],"collapse":[148],"13--17\\%":[150],"reconstruction":[153],"(CP3),":[154],"near":[156],"zero":[157],"derivation":[161],"(CP5).":[162],"FeynmanBench":[163],"offers":[164],"controlled":[166],"testbed":[167],"highlights":[176],"fundamental":[177],"limitations":[178],"current":[180],"architectures":[181],"in":[182],"topology-sensitive":[183],"reasoning.":[185]},"counts_by_year":[],"updated_date":"2026-06-03T06:16:58.514037","created_date":"2026-04-08T00:00:00"}
