{"id":"https://openalex.org/W7137892616","doi":"https://doi.org/10.1609/aaai.v40i1.36983","title":"OR-R1: Automating Modeling and Solving of Operations Research Optimization Problem via Test-Time Reinforcement Learning","display_name":"OR-R1: Automating Modeling and Solving of Operations Research Optimization Problem via Test-Time Reinforcement Learning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137892616","doi":"https://doi.org/10.1609/aaai.v40i1.36983"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i1.36983","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i1.36983","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/36983/40945","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/36983/40945","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129651174","display_name":"Zezhen Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Zezhen Ding","raw_affiliation_strings":["The Hong Kong University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129738799","display_name":"Zhen Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhen Tan","raw_affiliation_strings":["Arizona State University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Arizona State University","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120788484","display_name":"Jiheng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jiheng Zhang","raw_affiliation_strings":["The Hong Kong University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129694069","display_name":"Tianlong Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianlong Chen","raw_affiliation_strings":["University of North Carolina at Chapel Hill"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of North Carolina at Chapel Hill","institution_ids":["https://openalex.org/I114027177"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05952381,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"1","first_page":"228","last_page":"236"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.18369999527931213,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.18369999527931213,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.09989999979734421,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.059700001031160355,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7303000092506409},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5620999932289124},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.5515999794006348},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.529699981212616},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.49470001459121704},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4097000062465668},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.39579999446868896},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3808000087738037}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7878999710083008},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7303000092506409},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5620999932289124},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.5515999794006348},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.529699981212616},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.49470001459121704},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48069998621940613},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4733000099658966},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4097000062465668},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.39579999446868896},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3808000087738037},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.37380000948905945},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.3693000078201294},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.36230000853538513},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3244999945163727},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2800000011920929},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i1.36983","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i1.36983","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/36983/40945","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i1.36983","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i1.36983","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/36983/40945","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7133738398551941}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137892616.pdf","grobid_xml":"https://content.openalex.org/works/W7137892616.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Optimization":[0,133],"modeling":[1,92,244],"and":[2,29,53,76,93,113,126,145,215,236,245,250],"solving":[3,162,181],"are":[4,60],"fundamental":[5],"to":[6,101,140,185,221],"the":[7,18,50,103,106,124,169,210,248],"application":[8],"of":[9,20,56,67,164],"Operations":[10],"Research":[11],"(OR)":[12],"in":[13,39,73,206],"real-world":[14,227],"decision":[15],"making,":[16],"yet":[17],"process":[19],"translating":[21],"natural":[22],"language":[23,41],"problem":[24,111,243],"descriptions":[25],"into":[26],"formal":[27],"models":[28,42],"solver":[30],"code":[31,114],"remains":[32],"highly":[33],"expertise":[34,249],"intensive.":[35],"While":[36],"recent":[37],"advances":[38],"large":[40],"(LLMs)":[43],"have":[44],"opened":[45],"new":[46],"opportunities":[47],"for":[48,89,110,149,239,253],"automation,":[49],"generalization":[51],"ability":[52],"data":[54,148,171,251],"efficiency":[55],"existing":[57],"LLM-based":[58],"methods":[59,175],"still":[61],"limited,":[62],"asmost":[63],"require":[64],"vast":[65],"amounts":[66],"annotated":[68],"or":[69],"synthetic":[70,170,197],"data,":[71],"resulting":[72],"high":[74],"costs":[75],"scalability":[77],"barriers.":[78],"In":[79,120],"this":[80],"work,":[81],"we":[82],"present":[83],"OR-R1,":[84],"a":[85,233],"data-efficient":[86],"training":[87],"framework":[88],"automated":[90,240],"optimization":[91,242],"solving.":[94],"OR-R1":[95,139,155,188,231],"first":[96],"employs":[97],"supervised":[98],"fine-tuning":[99],"(SFT)":[100],"help":[102],"model":[104],"acquire":[105],"essential":[107],"reasoning":[108],"patterns":[109],"formulation":[112],"generation":[115],"from":[116,219],"limited":[117],"labeled":[118,144],"data.":[119],"addition,":[121],"it":[122],"improves":[123],"capability":[125],"consistency":[127],"through":[128],"Test-Time":[129],"Group":[130],"Relative":[131],"Policy":[132],"(TGRPO).":[134],"This":[135],"two-stage":[136],"design":[137],"enables":[138],"leverage":[141],"both":[142],"scarce":[143],"abundant":[146],"unlabeled":[147],"effective":[150],"learning.":[151],"Experiments":[152],"show":[153],"that":[154,230],"achieves":[156],"state-of-the-art":[157],"performance":[158,218],"with":[159,194],"an":[160,202],"average":[161],"accuracy":[163,182],"67.7%,":[165],"using":[166],"only":[167],"1/10":[168],"required":[172],"by":[173,183,191],"prior":[174],"such":[176],"as":[177],"ORLM,":[178],"exceeding":[179],"ORLM\u2019s":[180],"up":[184],"4.2%.":[186],"Remarkably,":[187],"outperforms":[189],"ORLM":[190],"over":[192],"2.4%":[193],"just":[195],"100":[196],"samples.":[198],"Furthermore,":[199],"TGRPO":[200],"contributes":[201],"additional":[203],"3.1%\u20136.4%":[204],"improvement":[205],"accuracy,":[207],"significantly":[208],"narrowing":[209],"gap":[211],"between":[212],"single-attempt":[213],"(Pass@1)":[214],"multi-attempt":[216],"(Pass@8)":[217],"13%":[220],"7%.":[222],"Extensive":[223],"evaluations":[224],"across":[225],"diverse":[226],"benchmarks":[228],"demonstrate":[229],"provides":[232],"robust,":[234],"scalable,":[235],"cost-effective":[237],"solution":[238],"OR":[241,255],"solving,":[246],"lowering":[247],"barriers":[252],"industrial":[254],"applications.":[256]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-18T00:00:00"}
