{"id":"https://openalex.org/W4206684559","doi":"https://doi.org/10.1109/lra.2022.3141148","title":"Weakly Supervised Disentangled Representation for Goal-Conditioned Reinforcement Learning","display_name":"Weakly Supervised Disentangled Representation for Goal-Conditioned Reinforcement Learning","publication_year":2022,"publication_date":"2022-01-11","ids":{"openalex":"https://openalex.org/W4206684559","doi":"https://doi.org/10.1109/lra.2022.3141148"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2022.3141148","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2022.3141148","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2202.13624","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021183991","display_name":"Zhifeng Qian","orcid":"https://orcid.org/0000-0001-9724-6928"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhifeng Qian","raw_affiliation_strings":["College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010299064","display_name":"Mingyu You","orcid":"https://orcid.org/0000-0003-2758-167X"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyu You","raw_affiliation_strings":["College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101789136","display_name":"Hongjun Zhou","orcid":"https://orcid.org/0000-0002-6256-2485"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongjun Zhou","raw_affiliation_strings":["College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049557511","display_name":"Bin He","orcid":"https://orcid.org/0000-0003-3193-6269"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin He","raw_affiliation_strings":["College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5021183991"],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":0.9653,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.78675976,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":"7","issue":"2","first_page":"2202","last_page":"2209"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8178790807723999},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7754514217376709},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6866528987884521},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6398569345474243},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6099144816398621},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.5910599827766418},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5438641905784607},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5324764251708984},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5317519903182983},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5057647228240967},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4882807731628418},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.47461119294166565},{"id":"https://openalex.org/keywords/controllability","display_name":"Controllability","score":0.421112596988678},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.18638616800308228},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1818399429321289}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8178790807723999},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7754514217376709},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6866528987884521},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6398569345474243},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6099144816398621},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.5910599827766418},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5438641905784607},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5324764251708984},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5317519903182983},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5057647228240967},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4882807731628418},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.47461119294166565},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.421112596988678},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.18638616800308228},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1818399429321289},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lra.2022.3141148","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2022.3141148","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2202.13624","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2202.13624","pdf_url":"https://arxiv.org/pdf/2202.13624","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2202.13624","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2202.13624","pdf_url":"https://arxiv.org/pdf/2202.13624","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2701178210","display_name":null,"funder_award_id":"62073244","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W567721252","https://openalex.org/W603908379","https://openalex.org/W1901129140","https://openalex.org/W1959608418","https://openalex.org/W2035866593","https://openalex.org/W2145339207","https://openalex.org/W2187089797","https://openalex.org/W2257979135","https://openalex.org/W2575671312","https://openalex.org/W2752796333","https://openalex.org/W2765407302","https://openalex.org/W2766447205","https://openalex.org/W2781726626","https://openalex.org/W2804205394","https://openalex.org/W2823112946","https://openalex.org/W2884585870","https://openalex.org/W2955368974","https://openalex.org/W2962785568","https://openalex.org/W2963799213","https://openalex.org/W2989847975","https://openalex.org/W3001312527","https://openalex.org/W3003410126","https://openalex.org/W3010515602","https://openalex.org/W3032077725","https://openalex.org/W3091703975","https://openalex.org/W3103213775","https://openalex.org/W3104898494","https://openalex.org/W3115293622","https://openalex.org/W3118210634","https://openalex.org/W3125947392","https://openalex.org/W3128270024","https://openalex.org/W3129559590","https://openalex.org/W3157685993","https://openalex.org/W3176179930","https://openalex.org/W3207837114","https://openalex.org/W4300799055","https://openalex.org/W6616173779","https://openalex.org/W6618372016","https://openalex.org/W6640963894","https://openalex.org/W6740801417","https://openalex.org/W6747473740","https://openalex.org/W6751836845","https://openalex.org/W6753060773","https://openalex.org/W6765456200","https://openalex.org/W6769166761","https://openalex.org/W6775634482","https://openalex.org/W6779827379","https://openalex.org/W6782984102","https://openalex.org/W6786234212","https://openalex.org/W6790554731"],"related_works":["https://openalex.org/W2983142544","https://openalex.org/W2891059443","https://openalex.org/W4281663961","https://openalex.org/W3208888551","https://openalex.org/W4313561566","https://openalex.org/W3208386644","https://openalex.org/W4389832810","https://openalex.org/W4220682630","https://openalex.org/W3181622257","https://openalex.org/W3163146846"],"abstract_inverted_index":{"Goal-conditioned":[0,69],"reinforcement":[1],"learning":[2,18,49,67],"is":[3,40,143,167],"a":[4,19,24,47,74,80],"crucial":[5],"yet":[6],"challenging":[7],"algorithm":[8],"which":[9,39,93,142],"enables":[10],"agents":[11,125],"to":[12,54,85,97,105,120,126,170,172],"achieve":[13],"multiple":[14],"user-specified":[15],"goals":[16,123],"when":[17],"set":[20],"of":[21,32,109,132],"skills":[22],"in":[23,92,158],"dynamic":[25],"environment.":[26],"However,":[27],"it":[28],"typically":[29],"requires":[30],"millions":[31],"the":[33,43,56,64,106,110,118,133,139,155,173],"environmental":[34],"interactions":[35],"explored":[36],"by":[37,62],"agents,":[38],"sample-inefficient.":[41],"In":[42,73,164],"letter,":[44],"we":[45,78],"propose":[46,79],"skill":[48],"framework":[50],"DR-GRL":[51,152,166],"that":[52,151],"aims":[53],"improve":[55],"sample":[57,159],"efficiency":[58,160],"and":[59,68,89,116,161],"policy":[60,162],"generalization":[61],"combining":[63],"Disentangled":[65],"Representation":[66],"visual":[70],"Reinforcement":[71],"Learning.":[72],"weakly":[75],"supervised":[76],"manner,":[77],"Spatial":[81],"Transform":[82],"AutoEncoder":[83],"(STAE)":[84],"learn":[86],"an":[87],"interpretable":[88],"controllable":[90],"representation":[91,135],"different":[94,98],"parts":[95],"correspond":[96],"object":[99],"attributes":[100],"(shape,":[101],"color,":[102],"position).":[103],"Due":[104],"high":[107],"controllability":[108],"representations,":[111],"STAE":[112],"can":[113],"simply":[114],"recombine":[115],"recode":[117],"representations":[119],"generate":[121],"unseen":[122],"for":[124,145],"practice":[127],"themselves.":[128],"The":[129],"manifold":[130],"structure":[131],"learned":[134],"maintains":[136],"consistency":[137],"with":[138],"physical":[140],"position,":[141],"beneficial":[144],"reward":[146],"calculation.":[147],"We":[148],"empirically":[149],"demonstrate":[150],"significantly":[153],"outperforms":[154],"previous":[156],"methods":[157],"generalization.":[163],"addition,":[165],"also":[168],"easy":[169],"expand":[171],"real":[174],"robot.":[175]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
