{"id":"https://openalex.org/W3157584903","doi":"https://doi.org/10.1109/icra48506.2021.9561402","title":"DisCo RL: Distribution-Conditioned Reinforcement Learning for General-Purpose Policies","display_name":"DisCo RL: Distribution-Conditioned Reinforcement Learning for General-Purpose Policies","publication_year":2021,"publication_date":"2021-05-30","ids":{"openalex":"https://openalex.org/W3157584903","doi":"https://doi.org/10.1109/icra48506.2021.9561402","mag":"3157584903"},"language":"en","primary_location":{"id":"doi:10.1109/icra48506.2021.9561402","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561402","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2104.11707","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022444244","display_name":"Soroush Nasiriany","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Soroush Nasiriany","raw_affiliation_strings":["University of California, Berkeley","University of California\u2013Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California\u2013Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001658549","display_name":"Vitchyr H. Pong","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vitchyr H. Pong","raw_affiliation_strings":["University of California, Berkeley","University of California\u2013Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California\u2013Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021472921","display_name":"Ashvin Nair","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ashvin Nair","raw_affiliation_strings":["University of California, Berkeley","University of California\u2013Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California\u2013Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020537133","display_name":"Alexander Khazatsky","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Khazatsky","raw_affiliation_strings":["University of California, Berkeley","University of California\u2013Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California\u2013Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045351810","display_name":"Glen Berseth","orcid":"https://orcid.org/0000-0001-7351-8028"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Glen Berseth","raw_affiliation_strings":["University of California, Berkeley","University of California\u2013Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California\u2013Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sergey Levine","raw_affiliation_strings":["University of California, Berkeley","University of California\u2013Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California\u2013Berkeley","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4198,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.68233955,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"2","issue":null,"first_page":"6635","last_page":"6641"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9800999760627747,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.860124409198761},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7894451022148132},{"id":"https://openalex.org/keywords/learnability","display_name":"Learnability","score":0.7553980946540833},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7551875114440918},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.6525298953056335},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5808838605880737},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5805907845497131},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5374539494514465},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5317358374595642},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4927199184894562},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4774525761604309},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.45810437202453613},{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.4475022554397583},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1313563883304596}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.860124409198761},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7894451022148132},{"id":"https://openalex.org/C2777723229","wikidata":"https://www.wikidata.org/wiki/Q4367921","display_name":"Learnability","level":2,"score":0.7553980946540833},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7551875114440918},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.6525298953056335},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5808838605880737},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5805907845497131},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5374539494514465},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5317358374595642},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4927199184894562},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4774525761604309},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45810437202453613},{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.4475022554397583},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1313563883304596},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icra48506.2021.9561402","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561402","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2104.11707","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.11707","pdf_url":"https://arxiv.org/pdf/2104.11707","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3157584903","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2104.11707","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2104.11707","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.11707","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2104.11707","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.11707","pdf_url":"https://arxiv.org/pdf/2104.11707","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3157584903.pdf","grobid_xml":"https://content.openalex.org/works/W3157584903.grobid-xml"},"referenced_works_count":84,"referenced_works":["https://openalex.org/W195596278","https://openalex.org/W567721252","https://openalex.org/W1594201624","https://openalex.org/W1934021597","https://openalex.org/W1959608418","https://openalex.org/W1982948368","https://openalex.org/W2121863487","https://openalex.org/W2145339207","https://openalex.org/W2409550820","https://openalex.org/W2417089653","https://openalex.org/W2423557781","https://openalex.org/W2733961795","https://openalex.org/W2769112066","https://openalex.org/W2774343553","https://openalex.org/W2789008106","https://openalex.org/W2803281228","https://openalex.org/W2804673281","https://openalex.org/W2810132790","https://openalex.org/W2884247313","https://openalex.org/W2893841966","https://openalex.org/W2903327785","https://openalex.org/W2908470496","https://openalex.org/W2914261249","https://openalex.org/W2949604932","https://openalex.org/W2953326529","https://openalex.org/W2953382159","https://openalex.org/W2955848960","https://openalex.org/W2962717849","https://openalex.org/W2962787969","https://openalex.org/W2962897886","https://openalex.org/W2962902376","https://openalex.org/W2963003097","https://openalex.org/W2963139417","https://openalex.org/W2963201535","https://openalex.org/W2963293881","https://openalex.org/W2963511511","https://openalex.org/W2963516265","https://openalex.org/W2963584407","https://openalex.org/W2963636093","https://openalex.org/W2963802910","https://openalex.org/W2964001908","https://openalex.org/W2964021598","https://openalex.org/W2964036701","https://openalex.org/W2964342357","https://openalex.org/W2970948392","https://openalex.org/W2984701531","https://openalex.org/W3032077725","https://openalex.org/W3035608172","https://openalex.org/W3100358394","https://openalex.org/W4214717370","https://openalex.org/W6608012839","https://openalex.org/W6616173779","https://openalex.org/W6635701881","https://openalex.org/W6639732818","https://openalex.org/W6640231202","https://openalex.org/W6640963894","https://openalex.org/W6714644935","https://openalex.org/W6716653466","https://openalex.org/W6718190810","https://openalex.org/W6737937804","https://openalex.org/W6740801417","https://openalex.org/W6745747580","https://openalex.org/W6746722632","https://openalex.org/W6747473740","https://openalex.org/W6748012927","https://openalex.org/W6748599296","https://openalex.org/W6751526458","https://openalex.org/W6752089545","https://openalex.org/W6752187413","https://openalex.org/W6752338937","https://openalex.org/W6752910514","https://openalex.org/W6753060773","https://openalex.org/W6753770476","https://openalex.org/W6754966139","https://openalex.org/W6756685538","https://openalex.org/W6758978475","https://openalex.org/W6760560886","https://openalex.org/W6762868464","https://openalex.org/W6763356705","https://openalex.org/W6765240361","https://openalex.org/W6769166761","https://openalex.org/W6769552686","https://openalex.org/W6774967489","https://openalex.org/W6779366341"],"related_works":["https://openalex.org/W3206552777","https://openalex.org/W2954667745","https://openalex.org/W3080901109","https://openalex.org/W2991616621","https://openalex.org/W3131034247","https://openalex.org/W2951823923","https://openalex.org/W3095548673","https://openalex.org/W2158641818","https://openalex.org/W2294805292","https://openalex.org/W1744884320","https://openalex.org/W2914584948","https://openalex.org/W2790924949","https://openalex.org/W2966613492","https://openalex.org/W3016622599","https://openalex.org/W2114451917","https://openalex.org/W2964342357","https://openalex.org/W2513373085","https://openalex.org/W3092485320","https://openalex.org/W3202944960","https://openalex.org/W2580068393"],"abstract_inverted_index":{"Can":[0],"we":[1,70],"use":[2],"reinforcement":[3,130],"learning":[4,131],"to":[5,48,117,134,163],"learn":[6,136],"general-purpose":[7],"policies":[8,25,53],"that":[9,63,93,152,160],"can":[10,95],"perform":[11],"a":[12,75,144],"wide":[13],"range":[14],"of":[15,34,40,112,146],"different":[16],"tasks,":[17],"resulting":[18],"in":[19,29,90],"flexible":[20],"and":[21,42,77,121,150],"reusable":[22],"skills?":[23],"Contextual":[24],"provide":[26],"this":[27,68],"capability":[28],"principle,":[30],"but":[31,58],"the":[32,35,38,91,109],"representation":[33,81],"context":[36],"determines":[37],"degree":[39],"generalization":[41,47,162],"expressivity.":[43],"Categorical":[44],"contexts":[45],"preclude":[46],"entirely":[49],"new":[50,164],"tasks.":[51],"Goal-conditioned":[52],"may":[54],"enable":[55],"some":[56],"generalization,":[57],"cannot":[59],"capture":[60],"all":[61],"tasks":[62,149,159],"might":[64],"be":[65],"desired.":[66],"In":[67],"paper,":[69],"propose":[71],"goal":[72,165],"distributions":[73,87],"as":[74],"general":[76,89],"broadly":[78],"applicable":[79],"task":[80],"suitable":[82],"for":[83],"contextual":[84],"policies.":[85,138],"Goal":[86],"are":[88],"sense":[92],"they":[94],"represent":[96],"any":[97],"state-based":[98],"reward":[99],"function":[100],"when":[101],"equipped":[102],"with":[103],"an":[104,125],"appropriate":[105],"distribution":[106,113],"class,":[107],"while":[108],"particular":[110],"choice":[111],"class":[114],"allows":[115],"us":[116],"trade":[118],"off":[119],"expressivity":[120],"learnability.":[122],"We":[123,139],"develop":[124],"off-policy":[126],"algorithm":[127],"called":[128],"distribution-conditioned":[129],"(DisCo":[132],"RL)":[133],"efficiently":[135],"these":[137],"evaluate":[140],"DisCo":[141],"RL":[142],"on":[143,158],"variety":[145],"robot":[147],"manipulation":[148],"find":[151],"it":[153],"significantly":[154],"outperforms":[155],"prior":[156],"methods":[157],"require":[161],"distributions.":[166]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
