{"id":"https://openalex.org/W7160836128","doi":"https://doi.org/10.48550/arxiv.2605.07251","title":"Can Agents Price a Reaction? Evaluating LLMs on Chemical Cost Reasoning","display_name":"Can Agents Price a Reaction? Evaluating LLMs on Chemical Cost Reasoning","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160836128","doi":"https://doi.org/10.48550/arxiv.2605.07251"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.07251","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07251","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.07251","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135905650","display_name":"Yuyang Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yuyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135861099","display_name":"Yue Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Yue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135876454","display_name":"Shuaike Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Shuaike","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135895656","display_name":"Xujian Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xujian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135886673","display_name":"Shuhao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shuhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115003568","display_name":"Qiyao Xue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xue, Qiyao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135860775","display_name":"Weichen Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Weichen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135900609","display_name":"Runtian Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Runtian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135853073","display_name":"Jian Feng Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135905959","display_name":"Xiangliang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiangliang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135830822","display_name":"Olexandr Isayev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Isayev, Olexandr","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.8784999847412109,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.8784999847412109,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.014000000432133675,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.013000000268220901,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/procurement","display_name":"Procurement","score":0.5024999976158142},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4350999891757965},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.37400001287460327},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.3582000136375427},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.3474999964237213},{"id":"https://openalex.org/keywords/plan","display_name":"Plan (archaeology)","score":0.34439998865127563}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6295999884605408},{"id":"https://openalex.org/C201650216","wikidata":"https://www.wikidata.org/wiki/Q829492","display_name":"Procurement","level":2,"score":0.5024999976158142},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.4611000120639801},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4350999891757965},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.3885999917984009},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.37400001287460327},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3474999964237213},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.34439998865127563},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.32440000772476196},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.320499986410141},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C55282118","wikidata":"https://www.wikidata.org/wiki/Q252683","display_name":"Snapshot (computer storage)","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C9233905","wikidata":"https://www.wikidata.org/wiki/Q3276328","display_name":"Bidding","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2621999979019165}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.07251","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07251","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.07251","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07251","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"become":[5],"increasingly":[6],"capable":[7],"as":[8],"tool-using":[9],"agents,":[10],"with":[11,60,148,184],"benchmarks":[12],"spanning":[13],"diverse":[14],"general":[15],"agentic":[16],"tasks.":[17],"Yet":[18],"rigorous":[19],"evaluation":[20],"of":[21,97,120],"scientific":[22],"tool":[23,157,205],"use":[24],"remains":[25],"limited.":[26],"In":[27],"chemistry,":[28],"recent":[29],"agents":[30,154,169],"can":[31],"plan":[32],"syntheses":[33],"and":[34,85,110,117,124,144,151,181,203],"invoke":[35],"domain-specific":[36],"tools,":[37],"but":[38,161],"evaluations":[39],"often":[40],"rely":[41],"on":[42,178],"curated":[43],"demonstrations,":[44],"expert":[45],"assessment,":[46],"or":[47],"LLM-as-judge":[48],"scoring":[49,116],"rather":[50],"than":[51],"exact,":[52],"judge-free":[53],"ground":[54,73],"truth.":[55],"We":[56,92],"address":[57],"this":[58],"gap":[59],"chemical":[61,74,138],"procurement":[62],"cost":[63,87],"estimation,":[64],"a":[65,89,95,103],"practical":[66],"task":[67],"in":[68],"which":[69],"an":[70],"agent":[71],"must":[72],"identities,":[75],"retrieve":[76],"supplier":[77,112],"quotes,":[78,113],"select":[79],"valid":[80],"purchasable":[81],"packs,":[82],"normalize":[83],"quantities,":[84],"compute":[86],"from":[88,194],"reaction":[90],"description.":[91],"introduce":[93],"ChemCost,":[94],"benchmark":[96],"1,427":[98],"evaluable":[99],"reactions":[100],"grounded":[101],"to":[102],"frozen":[104],"pricing":[105],"snapshot":[106],"covering":[107],"2,261":[108],"chemicals":[109],"230,775":[111],"supporting":[114],"scalar":[115],"stage-level":[118],"diagnosis":[119],"grounding,":[121],"retrieval,":[122],"procurement,":[123],"arithmetic":[125],"failures.":[126],"To":[127],"evaluate":[128],"robustness,":[129],"we":[130],"further":[131,189],"construct":[132],"controlled":[133],"noise-injected":[134],"views":[135],"that":[136,156,191],"perturb":[137],"aliases,":[139],"quantity":[140],"expressions,":[141],"missing":[142],"fields,":[143],"input":[145],"formatting.":[146],"Experiments":[147],"frontier,":[149],"open-weight,":[150],"chemistry-specialized":[152],"LLM":[153],"show":[155],"access":[158],"is":[159],"necessary":[160],"insufficient":[162],"for":[163],"solving":[164],"the":[165],"task.":[166],"The":[167],"strongest":[168],"reach":[170],"only":[171],"50.6%":[172],"accuracy":[173],"within":[174],"25%":[175],"relative":[176],"error":[177],"clean":[179],"inputs":[180],"degrade":[182],"substantially":[183],"realistic":[185],"noise.":[186],"Stage-level":[187],"analysis":[188],"shows":[190],"failures":[192],"arise":[193],"brittle":[195],"parsing,":[196],"ineffective":[197],"evidence":[198],"integration,":[199],"invalid":[200],"pack":[201],"selection,":[202],"non-convergent":[204],"use.":[206]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-12T00:00:00"}
