{"id":"https://openalex.org/W7160909247","doi":"https://doi.org/10.48550/arxiv.2605.08941","title":"MDGYM: Benchmarking AI Agents on Molecular Simulations","display_name":"MDGYM: Benchmarking AI Agents on Molecular Simulations","publication_year":2026,"publication_date":"2026-05-09","ids":{"openalex":"https://openalex.org/W7160909247","doi":"https://doi.org/10.48550/arxiv.2605.08941"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.08941","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08941","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.08941","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135974472","display_name":"Vinay Kumar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Vinay","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103140974","display_name":"Satyendra Rajput","orcid":"https://orcid.org/0009-0007-3097-7444"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rajput, Satyendra","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135986642","display_name":"Mausam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mausam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135948054","display_name":"N. M. Anoop Krishnan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krishnan, N. M. Anoop","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.6279000043869019,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.6279000043869019,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.19609999656677246,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.012199999764561653,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.5870000123977661},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5774999856948853},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5758000016212463},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.498199999332428},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4027000069618225},{"id":"https://openalex.org/keywords/fluent","display_name":"Fluent","score":0.35899999737739563}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6758999824523926},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.5870000123977661},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5774999856948853},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5758000016212463},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.498199999332428},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41830000281333923},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4027000069618225},{"id":"https://openalex.org/C2780411076","wikidata":"https://www.wikidata.org/wiki/Q5462653","display_name":"Fluent","level":3,"score":0.35899999737739563},{"id":"https://openalex.org/C116672817","wikidata":"https://www.wikidata.org/wiki/Q1454986","display_name":"Physical system","level":2,"score":0.3156000077724457},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3075999915599823},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28929999470710754},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2757999897003174},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.26350000500679016},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.25940001010894775},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.25440001487731934}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.08941","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08941","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.08941","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08941","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7918386459350586,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"promise":[1],"of":[2,72,119,135],"AI-driven":[3],"scientific":[4],"discovery":[5],"hinges":[6],"on":[7],"whether":[8],"AI":[9],"agents":[10,138],"can":[11],"autonomously":[12],"design":[13],"and":[14,43,51,58,65,79,100,106],"execute":[15],"the":[16,113,153],"computational":[17],"workflows":[18],"that":[19,108,181],"underpin":[20],"modern":[21],"science.":[22],"Molecular":[23],"dynamics":[24],"(MD)":[25],"simulation":[26,141],"presents":[27],"a":[28,70,132],"natural":[29],"test":[30],"bed":[31],"to":[32,188],"stress-test":[33],"this":[34],"claim;":[35],"it":[36],"requires":[37],"translating":[38],"physical":[39,63,190],"intuition":[40],"into":[41],"syntactically":[42],"semantically":[44],"correct":[45],"input":[46],"scripts,":[47],"reasoning":[48],"about":[49],"initial":[50],"boundary":[52],"conditions,":[53],"diagnosing":[54],"numerically":[55],"unstable":[56,146],"trajectories,":[57],"interpreting":[59],"outputs":[60,150],"against":[61],"known":[62],"behavior":[64],"laws.":[66],"We":[67,91],"introduce":[68],"MDGYM,":[69],"benchmark":[71],"169":[73],"expert-curated":[74],"MD":[75,84],"simulations":[76],"spanning":[77],"LAMMPS":[78],"GROMACS,":[80],"two":[81],"widely":[82],"used":[83],"packages,":[85],"across":[86],"three":[87,93],"increasing":[88],"difficulty":[89],"levels.":[90],"evaluate":[92],"agentic":[94],"frameworks":[95],"--":[96,102,137],"Claude":[97],"Code,":[98],"Codex,":[99],"OpenHands":[101],"with":[103,122],"four":[104],"LLMs,":[105],"find":[107],"all":[109],"perform":[110],"poorly:":[111],"even":[112],"strongest":[114],"agent":[115],"solves":[116],"only":[117],"21\\%":[118],"easy-level":[120],"tasks,":[121],"less":[123],"than":[124,161],"10\\%":[125],"at":[126],"higher":[127],"difficulties.":[128],"Trajectory":[129],"analysis":[130],"reveals":[131],"characteristic":[133],"pattern":[134],"failure":[136,167],"successfully":[139],"invoke":[140],"machinery":[142],"but":[143],"produce":[144],"physically":[145],"configurations,":[147],"fabricate":[148],"numerical":[149],"without":[151],"executing":[152],"underlying":[154],"computation,":[155],"or":[156],"abandon":[157],"tasks":[158],"prematurely":[159],"rather":[160],"iterating":[162],"through":[163],"simulation-specific":[164],"errors.":[165],"These":[166],"modes":[168],"are":[169],"qualitatively":[170],"distinct":[171],"from":[172],"those":[173],"observed":[174],"in":[175],"general":[176],"software":[177],"engineering":[178],"benchmarks,":[179],"indicating":[180],"fluent":[182],"code":[183],"generation":[184],"does":[185],"not":[186],"transfer":[187],"grounded":[189],"reasoning.":[191]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
