{"id":"https://openalex.org/W7138428186","doi":"https://doi.org/10.1609/aaai.v40i40.40711","title":"DeepPhy: Benchmarking Agentic VLMs on Physical Reasoning","display_name":"DeepPhy: Benchmarking Agentic VLMs on Physical Reasoning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138428186","doi":"https://doi.org/10.1609/aaai.v40i40.40711"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i40.40711","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i40.40711","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40711/44672","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40711/44672","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129749102","display_name":"Xinrun Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xinrun Xu","raw_affiliation_strings":["Taobao & Tmall Group of Alibaba\nUniversity of the Chinese Academy of Sciences\nInstitute of Software, Chinese Academy of Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Taobao & Tmall Group of Alibaba\nUniversity of the Chinese Academy of Sciences\nInstitute of Software, Chinese Academy of Science","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129648717","display_name":"Pi Bu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pi Bu","raw_affiliation_strings":["Taobao & Tmall Group of Alibaba"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Taobao & Tmall Group of Alibaba","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129687308","display_name":"Ye Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Wang","raw_affiliation_strings":["Renmin University of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Renmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015011965","display_name":"B\u00f6rje F. Karlsson","orcid":"https://orcid.org/0000-0001-8925-360X"},"institutions":[{"id":"https://openalex.org/I2699952","display_name":"Pontif\u00edcia Universidade Cat\u00f3lica do Rio de Janeiro","ror":"https://ror.org/01dg47b60","country_code":"BR","type":"education","lineage":["https://openalex.org/I2699952"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"B\u00f6rje F. Karlsson","raw_affiliation_strings":["Informatics Department, PUC-Rio"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Informatics Department, PUC-Rio","institution_ids":["https://openalex.org/I2699952"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129722463","display_name":"Ziming Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ziming Wang","raw_affiliation_strings":["Taobao & Tmall Group of Alibaba"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Taobao & Tmall Group of Alibaba","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059065177","display_name":"Tengtao Song","orcid":"https://orcid.org/0009-0000-7947-3473"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tengtao Song","raw_affiliation_strings":["Taobao & Tmall Group of Alibaba"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Taobao & Tmall Group of Alibaba","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129722128","display_name":"Qi Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qi Zhu","raw_affiliation_strings":["Taobao & Tmall Group of Alibaba"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Taobao & Tmall Group of Alibaba","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129653548","display_name":"Jun Song","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jun Song","raw_affiliation_strings":["Taobao & Tmall Group of Alibaba"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Taobao & Tmall Group of Alibaba","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129719938","display_name":"Zhiming Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiming Ding","raw_affiliation_strings":["Institute of Software, Chinese Academy of Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Software, Chinese Academy of Science","institution_ids":["https://openalex.org/I4210128818"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129715522","display_name":"Bo Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bo Zheng","raw_affiliation_strings":["Taobao & Tmall Group of Alibaba"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Taobao & Tmall Group of Alibaba","institution_ids":["https://openalex.org/I4210095624"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5129749102"],"corresponding_institution_ids":["https://openalex.org/I4210095624"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.72818312,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"40","first_page":"34160","last_page":"34168"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9692999720573425,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9692999720573425,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.003700000001117587,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.001500000013038516,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6380000114440918},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5514000058174133},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5249000191688538},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.49970000982284546},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4851999878883362},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.43050000071525574},{"id":"https://openalex.org/keywords/knowledge-representation-and-reasoning","display_name":"Knowledge representation and reasoning","score":0.3619000017642975},{"id":"https://openalex.org/keywords/comprehension","display_name":"Comprehension","score":0.35190001130104065}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6412000060081482},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6380000114440918},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5514000058174133},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5249000191688538},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5005999803543091},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.49970000982284546},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4851999878883362},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.43050000071525574},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.3619000017642975},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.35190001130104065},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3452000021934509},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.34470000863075256},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3391999900341034},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C116672817","wikidata":"https://www.wikidata.org/wiki/Q1454986","display_name":"Physical system","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C5065155","wikidata":"https://www.wikidata.org/wiki/Q1185775","display_name":"Frame problem","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.265500009059906},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.26089999079704285},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.2590999901294708}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i40.40711","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i40.40711","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40711/44672","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i40.40711","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i40.40711","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40711/44672","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.49518918991088867,"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"},{"id":"https://openalex.org/F4320335644","display_name":"Institute of Software, Chinese Academy of Sciences","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138428186.pdf","grobid_xml":"https://content.openalex.org/works/W7138428186.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Although":[0],"Vision":[1],"Language":[2],"Models":[3],"(VLMs)":[4],"exhibit":[5],"strong":[6],"perceptual":[7],"abilities":[8],"and":[9,19,42,84,107],"impressive":[10],"visual":[11],"reasoning,":[12,39],"they":[13],"struggle":[14,119],"with":[15],"attention":[16],"to":[17,28,79,120],"detail":[18],"precise":[20],"action":[21],"planning":[22],"in":[23,60],"complex,":[24],"dynamic":[25],"environments,":[26],"leading":[27],"subpar":[29],"performance.":[30],"Real-world":[31],"tasks":[32],"typically":[33],"require":[34],"complex":[35],"interactions,":[36],"advanced":[37],"spatial":[38],"long-term":[40],"planning,":[41],"continuous":[43],"strategy":[44],"refinement,":[45],"usually":[46],"necessitating":[47],"understanding":[48,83],"the":[49,53],"physics":[50],"rules":[51],"of":[52,93,103],"target":[54],"scenario.":[55],"However,":[56],"evaluating":[57],"these":[58],"capabilities":[59],"real-world":[61],"scenarios":[62],"is":[63],"often":[64],"prohibitively":[65],"expensive.":[66],"To":[67],"bridge":[68],"this":[69],"gap,":[70],"we":[71],"introduce":[72],"DeepPHY,":[73],"a":[74,91],"novel":[75],"benchmark":[76],"framework":[77],"designed":[78],"systematically":[80],"evaluate":[81],"VLMs'":[82],"reasoning":[85,101],"about":[86],"fundamental":[87],"physical":[88,100,123],"principles":[89],"through":[90],"series":[92],"challenging":[94],"simulated":[95],"environments.":[96],"DeepPHY":[97],"integrates":[98],"multiple":[99],"environments":[102],"varying":[104],"difficulty":[105],"levels":[106],"incorporates":[108],"fine-grained":[109],"evaluation":[110,113],"metrics.":[111],"Our":[112],"finds":[114],"that":[115],"even":[116],"state-of-the-art":[117],"VLMs":[118],"translate":[121],"descriptive":[122],"knowledge":[124],"into":[125],"precise,":[126],"predictive":[127],"control.":[128]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-18T00:00:00"}
