{"id":"https://openalex.org/W4401416400","doi":"https://doi.org/10.1109/icra57147.2024.10611711","title":"IQL-TD-MPC: Implicit Q-Learning for Hierarchical Model Predictive Control","display_name":"IQL-TD-MPC: Implicit Q-Learning for Hierarchical Model Predictive Control","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401416400","doi":"https://doi.org/10.1109/icra57147.2024.10611711"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10611711","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611711","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076794038","display_name":"Rohan Chitnis","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rohan Chitnis","raw_affiliation_strings":["Meta AI, FAIR"],"affiliations":[{"raw_affiliation_string":"Meta AI, FAIR","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111305989","display_name":"Yingchen Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yingchen Xu","raw_affiliation_strings":["Meta AI, FAIR"],"affiliations":[{"raw_affiliation_string":"Meta AI, FAIR","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107896357","display_name":"B. Hashemi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bobak Hashemi","raw_affiliation_strings":["Meta AI, FAIR"],"affiliations":[{"raw_affiliation_string":"Meta AI, FAIR","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006414380","display_name":"Lucas Lehnert","orcid":"https://orcid.org/0000-0001-5897-499X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lucas Lehnert","raw_affiliation_strings":["Meta AI, FAIR"],"affiliations":[{"raw_affiliation_string":"Meta AI, FAIR","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086993787","display_name":"\u00dcr\u00fcn Do\u01e7an","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Urun Dogan","raw_affiliation_strings":["Meta AI, FAIR"],"affiliations":[{"raw_affiliation_string":"Meta AI, FAIR","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050765125","display_name":"Zheqing Zhu","orcid":"https://orcid.org/0000-0002-1162-106X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheqing Zhu","raw_affiliation_strings":["Meta AI, FAIR"],"affiliations":[{"raw_affiliation_string":"Meta AI, FAIR","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065423636","display_name":"Olivier Delalleau","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olivier Delalleau","raw_affiliation_strings":["Meta AI, FAIR"],"affiliations":[{"raw_affiliation_string":"Meta AI, FAIR","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5076794038"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6989,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.69941155,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"9154","last_page":"9160"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/model-predictive-control","display_name":"Model predictive control","score":0.8310062289237976},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6348241567611694},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5070302486419678},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.4422313868999481},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.28956568241119385}],"concepts":[{"id":"https://openalex.org/C172205157","wikidata":"https://www.wikidata.org/wiki/Q1782962","display_name":"Model predictive control","level":3,"score":0.8310062289237976},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6348241567611694},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5070302486419678},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.4422313868999481},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28956568241119385}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10611711","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611711","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W192920577","https://openalex.org/W1583837637","https://openalex.org/W2109910161","https://openalex.org/W2120346334","https://openalex.org/W2133105703","https://openalex.org/W2209913494","https://openalex.org/W2498991332","https://openalex.org/W2559655401","https://openalex.org/W2604382266","https://openalex.org/W2612690371","https://openalex.org/W2752796333","https://openalex.org/W2978455699","https://openalex.org/W2989847975","https://openalex.org/W2996449210","https://openalex.org/W3016525976","https://openalex.org/W3022566517","https://openalex.org/W3090369311","https://openalex.org/W3118210634","https://openalex.org/W3169291081","https://openalex.org/W4212774754","https://openalex.org/W4221164079","https://openalex.org/W4281550413","https://openalex.org/W4281856560","https://openalex.org/W4287126489","https://openalex.org/W4287689949","https://openalex.org/W4287756699","https://openalex.org/W4292947419","https://openalex.org/W4294435908","https://openalex.org/W4306818327","https://openalex.org/W4360584316","https://openalex.org/W4385245566","https://openalex.org/W6677737365","https://openalex.org/W6678474417","https://openalex.org/W6679518283","https://openalex.org/W6687045409","https://openalex.org/W6754184789","https://openalex.org/W6756256016","https://openalex.org/W6760439459","https://openalex.org/W6771656438","https://openalex.org/W6772008794","https://openalex.org/W6776438516","https://openalex.org/W6776601253","https://openalex.org/W6779265984","https://openalex.org/W6779656125","https://openalex.org/W6782145165","https://openalex.org/W6784712800","https://openalex.org/W6796289742","https://openalex.org/W6796589144","https://openalex.org/W6802659552","https://openalex.org/W6803519892","https://openalex.org/W6804244202","https://openalex.org/W6809990427","https://openalex.org/W6838729795","https://openalex.org/W6842475989","https://openalex.org/W6842654171","https://openalex.org/W6842971753","https://openalex.org/W6846142413","https://openalex.org/W6846345420"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W1990079087","https://openalex.org/W2390279801","https://openalex.org/W2101188133","https://openalex.org/W3202234113","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W4248731570","https://openalex.org/W2556120871"],"abstract_inverted_index":{"Model-based":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"has":[4],"shown":[5],"great":[6],"promise":[7],"due":[8,42],"to":[9,43,103,131,138],"its":[10],"sample":[11],"efficiency,":[12],"but":[13],"still":[14],"struggles":[15],"with":[16,95,113,148],"long-horizon":[17],"sparse-reward":[18],"tasks,":[19,197],"especially":[20],"in":[21,39,53,109],"offline":[22,77,116,159,175],"settings":[23],"where":[24],"the":[25,59,84,166,174,192],"agent":[26],"learns":[27],"from":[28],"a":[29,44,54,107,110,120,126],"fixed":[30],"dataset.":[31],"We":[32,142],"hypothesize":[33],"that":[34,51,82,144],"model-based":[35,78],"RL":[36,79,117,160,176],"agents":[37],"struggle":[38],"these":[40],"environments":[41],"lack":[45],"of":[46,58,165],"long-term":[47],"planning":[48,52],"capabilities,":[49],"and":[50,99,181,194],"temporally":[55,127],"abstract":[56,128],"model":[57],"environment":[60],"can":[61],"alleviate":[62],"this":[63,66],"issue.":[64],"In":[65],"paper,":[67],"we":[68,74,101,124],"make":[69],"two":[70],"key":[71],"contributions:":[72],"1)":[73],"introduce":[75],"an":[76,153,202],"algorithm,":[80],"IQL-TD-MPC,":[81],"extends":[83],"state-":[85],"of-the-art":[86],"Temporal":[87],"Difference":[88],"Learning":[89],"for":[90],"Model":[91],"Predictive":[92],"Control":[93],"(TD-MPC)":[94],"Implicit":[96],"Q-Learning":[97],"(IQL);":[98],"2)":[100],"propose":[102],"use":[104],"IQL-TD-MPC":[105,129,154],"as":[106,119],"Manager":[108,130],"hierarchical":[111],"setting":[112],"any":[114],"off-the-shelf":[115,158],"algorithm":[118],"Worker.":[121],"More":[122],"specifically,":[123],"pre-train":[125],"predict":[132],"\"intent":[133],"embeddings\",":[134],"which":[135],"roughly":[136],"correspond":[137],"subgoals,":[139],"via":[140],"planning.":[141],"show":[143],"augmenting":[145],"state":[146],"representations":[147],"intent":[149],"embeddings":[150],"generated":[151],"by":[152],"manager":[155],"significantly":[156],"improves":[157],"agents'":[161],"performance":[162],"on":[163,191],"some":[164],"most":[167],"challenging":[168],"D4RL":[169],"benchmark":[170],"tasks.":[171],"For":[172],"instance,":[173],"algorithms":[177],"AWAC,":[178],"TD3-BC,":[179],"DT,":[180],"CQL":[182],"all":[183],"get":[184],"zero":[185],"or":[186],"near-zero":[187],"normalized":[188],"evaluation":[189],"scores":[190],"medium":[193],"large":[195],"antmaze":[196],"while":[198],"our":[199],"modification":[200],"gives":[201],"average":[203],"score":[204],"over":[205],"40.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
