{"id":"https://openalex.org/W4413925494","doi":"https://doi.org/10.1109/icra55743.2025.11128105","title":"Retrieval-Augmented Hierarchical in-Context Reinforcement Learning and Hindsight Modular Reflections for Task Planning with LLMs","display_name":"Retrieval-Augmented Hierarchical in-Context Reinforcement Learning and Hindsight Modular Reflections for Task Planning with LLMs","publication_year":2025,"publication_date":"2025-05-19","ids":{"openalex":"https://openalex.org/W4413925494","doi":"https://doi.org/10.1109/icra55743.2025.11128105"},"language":"en","primary_location":{"id":"doi:10.1109/icra55743.2025.11128105","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11128105","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087094568","display_name":"Chuanneng Sun","orcid":"https://orcid.org/0000-0001-7524-9044"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chuanneng Sun","raw_affiliation_strings":["Rutgers University-New Brunswick,Dept. of Electrical and Computer Engineering,NJ,USA"],"affiliations":[{"raw_affiliation_string":"Rutgers University-New Brunswick,Dept. of Electrical and Computer Engineering,NJ,USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072507819","display_name":"Songjun Huang","orcid":"https://orcid.org/0009-0005-9900-0530"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Songjun Huang","raw_affiliation_strings":["Rutgers University-New Brunswick,Dept. of Electrical and Computer Engineering,NJ,USA"],"affiliations":[{"raw_affiliation_string":"Rutgers University-New Brunswick,Dept. of Electrical and Computer Engineering,NJ,USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039805677","display_name":"Haiqiao Liu","orcid":"https://orcid.org/0000-0001-7818-5242"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haiqiao Liu","raw_affiliation_strings":["Rutgers University-New Brunswick,Dept. of Civil and Environmental Engineering,NJ,USA"],"affiliations":[{"raw_affiliation_string":"Rutgers University-New Brunswick,Dept. of Civil and Environmental Engineering,NJ,USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103091593","display_name":"Jie Gong","orcid":"https://orcid.org/0000-0002-7915-7304"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jie Gong","raw_affiliation_strings":["Rutgers University-New Brunswick,Dept. of Civil and Environmental Engineering,NJ,USA"],"affiliations":[{"raw_affiliation_string":"Rutgers University-New Brunswick,Dept. of Civil and Environmental Engineering,NJ,USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007041249","display_name":"Dario Pompili","orcid":"https://orcid.org/0000-0002-5365-509X"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dario Pompili","raw_affiliation_strings":["Rutgers University-New Brunswick,Dept. of Electrical and Computer Engineering,NJ,USA"],"affiliations":[{"raw_affiliation_string":"Rutgers University-New Brunswick,Dept. of Electrical and Computer Engineering,NJ,USA","institution_ids":["https://openalex.org/I102322142"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5087094568"],"corresponding_institution_ids":["https://openalex.org/I102322142"],"apc_list":null,"apc_paid":null,"fwci":5.7199,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.96084528,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1217","last_page":"1224"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9689000248908997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9689000248908997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9527000188827515,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hindsight-bias","display_name":"Hindsight bias","score":0.8835173845291138},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.7275701761245728},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6805869936943054},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6473705172538757},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6380890011787415},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6073213815689087},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3353663682937622},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.21272385120391846},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1845376193523407},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1351768970489502},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.09097287058830261},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06993177533149719}],"concepts":[{"id":"https://openalex.org/C10347200","wikidata":"https://www.wikidata.org/wiki/Q1960297","display_name":"Hindsight bias","level":2,"score":0.8835173845291138},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.7275701761245728},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6805869936943054},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6473705172538757},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6380890011787415},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6073213815689087},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3353663682937622},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.21272385120391846},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1845376193523407},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1351768970489502},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.09097287058830261},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06993177533149719},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra55743.2025.11128105","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11128105","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8623549261","display_name":null,"funder_award_id":"CCF-1937403","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2889787757","https://openalex.org/W2964227312","https://openalex.org/W3043699236","https://openalex.org/W3133150469","https://openalex.org/W4383108432","https://openalex.org/W4389520747","https://openalex.org/W4390874280","https://openalex.org/W4393160747","https://openalex.org/W4401043434","https://openalex.org/W4401413973","https://openalex.org/W4401414087","https://openalex.org/W4401414318","https://openalex.org/W4401414944","https://openalex.org/W4401415105","https://openalex.org/W4401416612","https://openalex.org/W4401508458","https://openalex.org/W4401508531","https://openalex.org/W4402354083","https://openalex.org/W4403760121","https://openalex.org/W4406138276","https://openalex.org/W4406355107","https://openalex.org/W4413349819"],"related_works":["https://openalex.org/W3197854638","https://openalex.org/W3140454661","https://openalex.org/W4245029315","https://openalex.org/W1492315459","https://openalex.org/W1512434910","https://openalex.org/W2540910169","https://openalex.org/W3148904318","https://openalex.org/W2139970489","https://openalex.org/W2022803902","https://openalex.org/W2105474389"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"demonstrated":[5],"remarkable":[6],"abilities":[7],"in":[8,18,48,80,121,139,148],"various":[9],"language":[10],"tasks,":[11],"making":[12],"them":[13],"promising":[14],"candidates":[15],"for":[16],"decision-making":[17,115],"robotics.":[19],"Inspired":[20],"by":[21,57,65,182],"Hierarchical":[22,29],"Reinforcement":[23],"Learning":[24,32],"(HRL),":[25],"we":[26,83,97],"propose":[27,84],"Retrieval-Augmented":[28],"in-context":[30],"reinforcement":[31],"(RAHL),":[33],"a":[34,50,58],"novel":[35],"framework":[36],"that":[37,133],"decomposes":[38],"complex":[39,51],"tasks":[40],"into":[41,55],"sub-tasks":[42,56],"using":[43,105],"an":[44,137],"LLM-based":[45],"high-level":[46,59],"policy,":[47],"which":[49,166],"task":[52],"is":[53,167],"decomposed":[54],"policy":[60,72],"on-the-fly.":[61],"The":[62],"sub-tasks,":[63],"defined":[64],"goals,":[66],"are":[67],"assigned":[68],"to":[69,73,108,153,169,178],"the":[70,77,94,99,114,118,130,140,161,172,183],"low-level":[71],"complete.":[74],"To":[75],"improve":[76,109],"agent's":[78],"performance":[79,141],"multi-episode":[81],"execution,":[82],"Hindsight":[85],"Modular":[86],"Reflection":[87],"(HMR),":[88],"where,":[89],"instead":[90],"of":[91,117],"reflecting":[92],"on":[93,102,160],"full":[95],"trajectory,":[96],"let":[98],"agent":[100],"reflect":[101],"shorter":[103],"sub-trajectories":[104],"intermediate":[106],"goals":[107],"reflection":[110],"efficiency.":[111],"We":[112,156],"evaluated":[113],"ability":[116],"proposed":[119],"RAHL":[120,134,159],"three":[122],"benchmark":[123],"environments,":[124],"ALFWorld,":[125],"Webshop,":[126],"and":[127,146,176],"HotpotQA,":[128],"where":[129],"results":[131],"show":[132],"can":[135],"achieve":[136],"improvement":[138],"of,":[142],"respectively,":[143],"9%,":[144],"42%,":[145],"10%":[147],"5":[149],"execution":[150],"episodes":[151],"compared":[152],"state-of-the-art":[154],"baselines.":[155],"also":[157],"implemented":[158],"Boston":[162],"Dynamics":[163],"SPOT":[164],"robot,":[165],"shown":[168],"effectively":[170],"scan":[171],"environment,":[173],"find":[174],"entrances,":[175],"navigate":[177],"new":[179],"rooms":[180],"controlled":[181],"LLM":[184],"policy.":[185]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
