{"id":"https://openalex.org/W7148553698","doi":"https://doi.org/10.48550/arxiv.2604.00442","title":"Execution-Verified Reinforcement Learning for Optimization Modeling","display_name":"Execution-Verified Reinforcement Learning for Optimization Modeling","publication_year":2026,"publication_date":"2026-04-01","ids":{"openalex":"https://openalex.org/W7148553698","doi":"https://doi.org/10.48550/arxiv.2604.00442"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.00442","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00442","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.00442","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111239523","display_name":"Runda Guan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Guan, Runda","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028086891","display_name":"Xiangqing Shen","orcid":"https://orcid.org/0000-0002-9825-7853"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Xiangqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132818342","display_name":"Jiajun Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jiajun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132818542","display_name":"Yifan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yifan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132830388","display_name":"Jian Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132749036","display_name":"Rui Xia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Rui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5111239523"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.23180000483989716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.23180000483989716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.12020000070333481,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.10119999945163727,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.7860000133514404},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6158000230789185},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5910000205039978},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5127000212669373},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5101000070571899},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.47189998626708984},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4302000105381012},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.4212999939918518}],"concepts":[{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.7860000133514404},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7315000295639038},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6158000230789185},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5910000205039978},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5127000212669373},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5101000070571899},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.47189998626708984},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4302000105381012},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.428600013256073},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.4212999939918518},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41290000081062317},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.40630000829696655},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3935999870300293},{"id":"https://openalex.org/C3019612716","wikidata":"https://www.wikidata.org/wiki/Q730920","display_name":"Problem solver","level":2,"score":0.36800000071525574},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3483999967575073},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.3287999927997589},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.31940001249313354},{"id":"https://openalex.org/C199622910","wikidata":"https://www.wikidata.org/wiki/Q1128326","display_name":"Constraint satisfaction problem","level":3,"score":0.26899999380111694},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2615000009536743}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.00442","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00442","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.00442","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00442","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5382109880447388,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Automating":[0],"optimization":[1],"modeling":[2],"with":[3,25,49,99],"LLMs":[4,24,32],"is":[5],"a":[6,41,64,69,74,78,88,104],"promising":[7],"path":[8],"toward":[9],"scalable":[10],"decision":[11],"intelligence,":[12],"but":[13],"existing":[14],"approaches":[15],"either":[16],"rely":[17],"on":[18,22,132],"agentic":[19],"pipelines":[20],"built":[21],"closed-source":[23],"high":[26],"inference":[27],"latency,":[28],"or":[29,147],"fine-tune":[30],"smaller":[31],"using":[33],"costly":[34],"process":[35],"supervision":[36],"that":[37,62,144],"often":[38],"overfits":[39],"to":[40],"single":[42],"solver":[43,67,153,159,167],"API.":[44],"Inspired":[45],"by":[46,121,161],"reinforcement":[47],"learning":[48,60],"verifiable":[50],"rewards,":[51,97],"we":[52],"propose":[53],"Execution-Verified":[54],"Optimization":[55],"Modeling":[56],"(EVOM),":[57],"an":[58],"execution-verified":[59],"framework":[61],"treats":[63],"mathematical":[65],"programming":[66],"as":[68],"deterministic,":[70],"interactive":[71],"verifier.":[72],"Given":[73],"natural-language":[75],"problem":[76],"and":[77,91,101,117,136,141,155],"target":[79,166],"solver,":[80],"EVOM":[81,145],"generates":[82],"solver-specific":[83,129],"code,":[84],"executes":[85],"it":[86],"in":[87,103],"sandboxed":[89],"harness,":[90],"converts":[92],"execution":[93],"outcomes":[94],"into":[95],"scalar":[96],"optimized":[98],"GRPO":[100],"DAPO":[102],"closed-loop":[105],"generate-execute-feedback-update":[106],"process.":[107],"This":[108],"outcome-only":[109],"formulation":[110],"removes":[111],"the":[112,123,165],"need":[113],"for":[114],"process-level":[115],"supervision,":[116],"enables":[118],"cross-solver":[119],"generalization":[120],"switching":[122],"verification":[124],"environment":[125],"rather":[126],"than":[127],"reconstructing":[128],"datasets.":[130],"Experiments":[131],"NL4OPT,":[133],"MAMO,":[134],"IndustryOR,":[135],"OptiBench":[137],"across":[138],"Gurobi,":[139],"OR-Tools,":[140],"COPT":[142],"show":[143],"matches":[146],"outperforms":[148],"process-supervised":[149],"SFT,":[150],"supports":[151],"zero-shot":[152],"transfer,":[154],"achieves":[156],"effective":[157],"low-cost":[158],"adaptation":[160],"continuing":[162],"training":[163],"under":[164],"backend.":[168]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-03T00:00:00"}
