{"id":"https://openalex.org/W4399850944","doi":"https://doi.org/10.1145/3656447","title":"Reward-Guided Synthesis of Intelligent Agents with Control Structures","display_name":"Reward-Guided Synthesis of Intelligent Agents with Control Structures","publication_year":2024,"publication_date":"2024-06-20","ids":{"openalex":"https://openalex.org/W4399850944","doi":"https://doi.org/10.1145/3656447"},"language":"en","primary_location":{"id":"doi:10.1145/3656447","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656447","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3656447","source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3656447","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055115915","display_name":"Guofeng Cui","orcid":"https://orcid.org/0000-0002-7994-915X"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Guofeng Cui","raw_affiliation_strings":["Rutgers University, New Brunswick, USA"],"raw_orcid":"https://orcid.org/0000-0002-7994-915X","affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066740799","display_name":"Yuning Wang","orcid":"https://orcid.org/0009-0000-4317-9758"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuning Wang","raw_affiliation_strings":["Rutgers University, New Brunswick, USA"],"raw_orcid":"https://orcid.org/0009-0000-4317-9758","affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014081501","display_name":"Wenjie Qiu","orcid":"https://orcid.org/0000-0002-2271-6443"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenjie Qiu","raw_affiliation_strings":["Rutgers University, New Brunswick, USA"],"raw_orcid":"https://orcid.org/0000-0002-2271-6443","affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004145814","display_name":"He Zhu","orcid":"https://orcid.org/0000-0001-9606-150X"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"He Zhu","raw_affiliation_strings":["Rutgers University, New Brunswick, USA"],"raw_orcid":"https://orcid.org/0000-0001-9606-150X","affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, USA","institution_ids":["https://openalex.org/I102322142"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5055115915"],"corresponding_institution_ids":["https://openalex.org/I102322142"],"apc_list":null,"apc_paid":null,"fwci":0.6294,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.72711525,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"8","issue":"PLDI","first_page":"1730","last_page":"1754"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.9599000215530396,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5099884271621704},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3799149692058563},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.3698841631412506},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.36496034264564514},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2243664562702179}],"concepts":[{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5099884271621704},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3799149692058563},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.3698841631412506},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.36496034264564514},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2243664562702179}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3656447","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656447","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3656447","source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3656447","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656447","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3656447","source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7849624421","display_name":null,"funder_award_id":"CCF-2124155","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399850944.pdf"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W1554233645","https://openalex.org/W1586162706","https://openalex.org/W1625390266","https://openalex.org/W1680797894","https://openalex.org/W1858945639","https://openalex.org/W2026775511","https://openalex.org/W2089561656","https://openalex.org/W2127972144","https://openalex.org/W2158782408","https://openalex.org/W2165005075","https://openalex.org/W2342662072","https://openalex.org/W2550471858","https://openalex.org/W2560662850","https://openalex.org/W2560674852","https://openalex.org/W2561055248","https://openalex.org/W2604662268","https://openalex.org/W2626990892","https://openalex.org/W2747329762","https://openalex.org/W2765742677","https://openalex.org/W2796284132","https://openalex.org/W2922007426","https://openalex.org/W2963037989","https://openalex.org/W2996811470","https://openalex.org/W3009994680","https://openalex.org/W3034296891","https://openalex.org/W3043557120","https://openalex.org/W3092881338","https://openalex.org/W3173049816","https://openalex.org/W4205130889","https://openalex.org/W4310895557","https://openalex.org/W4386811828","https://openalex.org/W4388483808","https://openalex.org/W4388505012","https://openalex.org/W6697544457","https://openalex.org/W6745915375","https://openalex.org/W6911368834"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"has":[4],"led":[5],"to":[6,20,66,74,86,93,122,138],"encouraging":[7],"successes":[8],"in":[9,26,38],"numerous":[10],"challenging":[11,160],"robotics":[12],"applications.":[13],"However,":[14],"the":[15,27,56,88,95,125],"lack":[16],"of":[17,29,90,113],"inductive":[18],"biases":[19],"support":[21],"logic":[22],"deduction":[23],"and":[24,116,131,154,168,175],"generalization":[25],"representation":[28],"a":[30,82,100],"deep":[31],"RL":[32,53,64,77,142,152],"model":[33],"causes":[34],"it":[35],"less":[36],"effective":[37],"exploring":[39],"complex":[40],"long-horizon":[41],"robot-control":[42,72,133],"tasks":[43,134,162],"with":[44,105],"sparse":[45],"reward":[46],"signals.":[47],"Existing":[48],"program":[49,68,91,156],"synthesis":[50,84,92,103,112,118,157],"algorithms":[51,65,153],"for":[52,109,119,128],"problems":[54],"inherit":[55],"same":[57],"limitation,":[58],"as":[59],"they":[60],"either":[61],"adapt":[62],"conventional":[63,141],"guide":[67],"search":[69,107],"or":[70],"synthesize":[71],"programs":[73],"imitate":[75],"an":[76],"model.":[78],"We":[79,98],"propose":[80],"ReGuS,":[81],"reward-guided":[83],"paradigm,":[85],"unlock":[87],"potential":[89],"overcome":[94],"exploration":[96,126],"challenges.":[97],"develop":[99],"novel":[101],"hierarchical":[102],"algorithm":[104],"decomposed":[106],"space":[108,127],"loops,":[110],"on-demand":[111],"conditional":[114],"statements,":[115],"curriculum":[117],"procedure":[120],"calls,":[121],"effectively":[123],"compress":[124],"long-horizon,":[129],"multi-stage,":[130],"procedural":[132],"that":[135,147],"are":[136],"difficult":[137],"address":[139],"by":[140],"techniques.":[143],"Experiment":[144],"results":[145],"demonstrate":[146],"ReGuS":[148],"significantly":[149],"outperforms":[150],"state-of-the-art":[151],"standard":[155],"baselines":[158],"on":[159],"robot":[161],"including":[163],"autonomous":[164],"driving,":[165],"locomotion":[166],"control,":[167],"object":[169],"manipulation.":[170],"CCS":[171],"Concepts:":[172],"\u2022":[173],"Software":[174],"its":[176],"engineering":[177],"\u2192":[178],"Automatic":[179],"programming.":[180]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
