{"id":"https://openalex.org/W2792404612","doi":"https://doi.org/10.1109/icra.2018.8460756","title":"Composable Deep Reinforcement Learning for Robotic Manipulation","display_name":"Composable Deep Reinforcement Learning for Robotic Manipulation","publication_year":2018,"publication_date":"2018-05-01","ids":{"openalex":"https://openalex.org/W2792404612","doi":"https://doi.org/10.1109/icra.2018.8460756","mag":"2792404612"},"language":"en","primary_location":{"id":"doi:10.1109/icra.2018.8460756","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2018.8460756","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1803.06773","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030073861","display_name":"Tuomas Haarnoja","orcid":"https://orcid.org/0009-0007-2973-9246"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tuomas Haarnoja","raw_affiliation_strings":["Berkeley Artificial Intelligence Research, Berkeley, UC"],"affiliations":[{"raw_affiliation_string":"Berkeley Artificial Intelligence Research, Berkeley, UC","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001658549","display_name":"Vitchyr H. Pong","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vitchyr Pong","raw_affiliation_strings":["Berkeley Artificial Intelligence Research, Berkeley, UC"],"affiliations":[{"raw_affiliation_string":"Berkeley Artificial Intelligence Research, Berkeley, UC","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070399587","display_name":"Aurick Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aurick Zhou","raw_affiliation_strings":["Berkeley Artificial Intelligence Research, Berkeley, UC"],"affiliations":[{"raw_affiliation_string":"Berkeley Artificial Intelligence Research, Berkeley, UC","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060660082","display_name":"Murtaza Dalal","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Murtaza Dalal","raw_affiliation_strings":["Berkeley Artificial Intelligence Research, Berkeley, UC"],"affiliations":[{"raw_affiliation_string":"Berkeley Artificial Intelligence Research, Berkeley, UC","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049349154","display_name":"Pieter Abbeel","orcid":null},"institutions":[{"id":"https://openalex.org/I4210161460","display_name":"OpenAI (United States)","ror":"https://ror.org/05wx9n238","country_code":"US","type":"company","lineage":["https://openalex.org/I4210161460"]},{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pieter Abbeel","raw_affiliation_strings":["Open AI","Berkeley Artificial Intelligence Research, Berkeley, UC"],"affiliations":[{"raw_affiliation_string":"Open AI","institution_ids":["https://openalex.org/I4210161460"]},{"raw_affiliation_string":"Berkeley Artificial Intelligence Research, Berkeley, UC","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sergey Levine","raw_affiliation_strings":["Berkeley Artificial Intelligence Research, Berkeley, UC"],"affiliations":[{"raw_affiliation_string":"Berkeley Artificial Intelligence Research, Berkeley, UC","institution_ids":["https://openalex.org/I1343180700"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5030073861"],"corresponding_institution_ids":["https://openalex.org/I1343180700"],"apc_list":null,"apc_paid":null,"fwci":4.3874,"has_fulltext":true,"cited_by_count":38,"citation_normalized_percentile":{"value":0.95434139,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"6244","last_page":"6251"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9671000242233276,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9053090810775757},{"id":"https://openalex.org/keywords/principle-of-compositionality","display_name":"Principle of compositionality","score":0.7711816430091858},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7531651854515076},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6681874990463257},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.5854955911636353},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4602590501308441},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4392056167125702},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.43274474143981934},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4104728698730469}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9053090810775757},{"id":"https://openalex.org/C121375916","wikidata":"https://www.wikidata.org/wiki/Q936559","display_name":"Principle of compositionality","level":2,"score":0.7711816430091858},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7531651854515076},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6681874990463257},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.5854955911636353},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4602590501308441},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4392056167125702},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.43274474143981934},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4104728698730469},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icra.2018.8460756","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2018.8460756","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1803.06773","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1803.06773","pdf_url":"https://arxiv.org/pdf/1803.06773","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.1803.06773","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1803.06773","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:2792404612","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1803.06773","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1803.06773","pdf_url":"https://arxiv.org/pdf/1803.06773","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.4300000071525574,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2792404612.pdf","grobid_xml":"https://content.openalex.org/works/W2792404612.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W2026659355","https://openalex.org/W2042882799","https://openalex.org/W2098774185","https://openalex.org/W2107464055","https://openalex.org/W2121615981","https://openalex.org/W2125612430","https://openalex.org/W2127107099","https://openalex.org/W2129515556","https://openalex.org/W2136719407","https://openalex.org/W2145060720","https://openalex.org/W2155772159","https://openalex.org/W2158782408","https://openalex.org/W2161395589","https://openalex.org/W2161905760","https://openalex.org/W2173248099","https://openalex.org/W2296360731","https://openalex.org/W2426267443","https://openalex.org/W2443711627","https://openalex.org/W2511837229","https://openalex.org/W2557046619","https://openalex.org/W2565902248","https://openalex.org/W2566467060","https://openalex.org/W2575705757","https://openalex.org/W2592538810","https://openalex.org/W2593044849","https://openalex.org/W2605102758","https://openalex.org/W2609650878","https://openalex.org/W2725320964","https://openalex.org/W2727576081","https://openalex.org/W2733961795","https://openalex.org/W2741122588","https://openalex.org/W2781726626","https://openalex.org/W2949561945","https://openalex.org/W2950471160","https://openalex.org/W2950848044","https://openalex.org/W2952606116","https://openalex.org/W2963267001","https://openalex.org/W2964161785","https://openalex.org/W4255064568","https://openalex.org/W6674884181","https://openalex.org/W6676139019","https://openalex.org/W6678157427","https://openalex.org/W6681439324","https://openalex.org/W6682708815","https://openalex.org/W6683845664","https://openalex.org/W6696324988","https://openalex.org/W6713603661","https://openalex.org/W6725708968","https://openalex.org/W6734206676","https://openalex.org/W6738940082","https://openalex.org/W6740801417"],"related_works":["https://openalex.org/W2963403593","https://openalex.org/W2949561945","https://openalex.org/W2781726626","https://openalex.org/W2964161785","https://openalex.org/W1757796397","https://openalex.org/W2959488596","https://openalex.org/W2964043796","https://openalex.org/W2949608212","https://openalex.org/W2736601468","https://openalex.org/W2257979135","https://openalex.org/W2173248099","https://openalex.org/W2158782408","https://openalex.org/W2145339207","https://openalex.org/W3130354689","https://openalex.org/W2963339188","https://openalex.org/W3040408533","https://openalex.org/W3037039089","https://openalex.org/W3206132287","https://openalex.org/W2787824708","https://openalex.org/W3028830971"],"abstract_inverted_index":{"Model-free":[0],"deep":[1,182],"reinforcement":[2,183],"learning":[3,95,184],"has":[4],"been":[5],"shown":[6],"to":[7,17,28,65,74,114],"exhibit":[8],"good":[9],"performance":[10],"in":[11,129,161],"domains":[12],"ranging":[13],"from":[14,165],"video":[15],"games":[16],"simulated":[18,194],"robotic":[19,47,67],"manipulation":[20,76],"and":[21,118,186,195],"locomotion.":[22],"However,":[23],"model-free":[24,181],"methods":[25],"are":[26],"known":[27],"perform":[29],"poorly":[30],"when":[31],"the":[32,36,42,120,123,132,135],"interaction":[33],"time":[34],"with":[35,108],"environment":[37],"is":[38,41,77,174],"limited,":[39],"as":[40],"case":[43],"for":[44,145,192],"most":[45],"real-world":[46,66,75,146,196],"tasks.":[48,197],"In":[49],"this":[50,72],"paper,":[51],"we":[52,103],"study":[53],"how":[54],"maximum":[55],"entropy":[56],"policies":[57,96,106,151],"trained":[58],"using":[59],"soft":[60,84,87,109,172],"Q-learning":[61,88,110,173],"can":[62,89,111,126,156,189],"be":[63,112,127,190],"applied":[64],"manipulation.":[68],"The":[69],"application":[70],"of":[71,83,122,131],"method":[73],"facilitated":[78],"by":[79,94,98,152],"two":[80],"important":[81],"features":[82],"Q-learning.":[85],"First,":[86],"learn":[90],"multimodal":[91],"exploration":[92],"strategies":[93],"represented":[97],"expressive":[99],"energy-based":[100],"models.":[101],"Second,":[102],"show":[104],"that":[105,119,171,187],"learned":[107],"composed":[113,136],"create":[115],"new":[116,150],"policies,":[117],"optimality":[121],"resulting":[124],"policy":[125],"bounded":[128],"terms":[130],"divergence":[133],"between":[134],"policies.":[137],"This":[138],"compositionality":[139,188],"provides":[140],"an":[141],"especially":[142],"valuable":[143],"tool":[144],"manipulation,":[147],"where":[148],"constructing":[149],"composing":[153],"existing":[154],"skills":[155],"provide":[157],"a":[158],"large":[159],"gain":[160],"efficiency":[162],"over":[163],"training":[164],"scratch.":[166],"Our":[167],"experimental":[168],"evaluation":[169],"demonstrates":[170],"substantially":[175],"more":[176],"sample":[177],"efficient":[178],"than":[179],"prior":[180],"methods,":[185],"performed":[191],"both":[193]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":10},{"year":2018,"cited_by_count":6}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
