{"id":"https://openalex.org/W4381233049","doi":"https://doi.org/10.1109/lra.2023.3287362","title":"Goal-Conditioned Reinforcement Learning With Disentanglement-Based Reachability Planning","display_name":"Goal-Conditioned Reinforcement Learning With Disentanglement-Based Reachability Planning","publication_year":2023,"publication_date":"2023-06-19","ids":{"openalex":"https://openalex.org/W4381233049","doi":"https://doi.org/10.1109/lra.2023.3287362"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2023.3287362","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2023.3287362","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021183991","display_name":"Zhifeng Qian","orcid":"https://orcid.org/0000-0001-9724-6928"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhifeng Qian","raw_affiliation_strings":["College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010299064","display_name":"Mingyu You","orcid":"https://orcid.org/0000-0003-2758-167X"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyu You","raw_affiliation_strings":["College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101789136","display_name":"Hongjun Zhou","orcid":"https://orcid.org/0000-0002-6256-2485"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongjun Zhou","raw_affiliation_strings":["College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005456562","display_name":"Xuanhui Xu","orcid":"https://orcid.org/0000-0003-0394-8713"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanhui Xu","raw_affiliation_strings":["College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049557511","display_name":"Bin He","orcid":"https://orcid.org/0000-0003-3193-6269"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin He","raw_affiliation_strings":["College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Frontiers Science Center for Intelligent Autonomous Systems, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5021183991"],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":1.0438,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.80919084,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"8","issue":"8","first_page":"4721","last_page":"4728"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reachability","display_name":"Reachability","score":0.8016074895858765},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7799469232559204},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7328990697860718},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.664318323135376},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5875465869903564},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5407509803771973},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5228234529495239},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5135531425476074},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4879876971244812},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.4642632007598877},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.4299715757369995},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.41159236431121826},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.40508395433425903},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3526495099067688},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.12164047360420227},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0902428925037384},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.07983073592185974},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07517370581626892},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.06841975450515747}],"concepts":[{"id":"https://openalex.org/C136643341","wikidata":"https://www.wikidata.org/wiki/Q1361526","display_name":"Reachability","level":2,"score":0.8016074895858765},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7799469232559204},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7328990697860718},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.664318323135376},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5875465869903564},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5407509803771973},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5228234529495239},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5135531425476074},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4879876971244812},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.4642632007598877},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.4299715757369995},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.41159236431121826},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.40508395433425903},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3526495099067688},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.12164047360420227},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0902428925037384},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.07983073592185974},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07517370581626892},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.06841975450515747},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2023.3287362","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2023.3287362","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.49000000953674316,"display_name":"Reduced inequalities"},{"id":"https://metadata.un.org/sdg/16","score":0.4099999964237213,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G2701178210","display_name":null,"funder_award_id":"62073244","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W603908379","https://openalex.org/W1594201624","https://openalex.org/W1959608418","https://openalex.org/W2132083787","https://openalex.org/W2158782408","https://openalex.org/W2187089797","https://openalex.org/W2787938642","https://openalex.org/W2823112946","https://openalex.org/W2895453875","https://openalex.org/W2949475445","https://openalex.org/W2963523627","https://openalex.org/W2964036701","https://openalex.org/W2964067469","https://openalex.org/W2999417551","https://openalex.org/W3006036127","https://openalex.org/W3088304681","https://openalex.org/W3104898494","https://openalex.org/W3109943994","https://openalex.org/W3115293622","https://openalex.org/W3173049816","https://openalex.org/W3201368126","https://openalex.org/W3205239453","https://openalex.org/W3207837114","https://openalex.org/W3210048194","https://openalex.org/W4206684559","https://openalex.org/W4285009743","https://openalex.org/W4285601030","https://openalex.org/W4288021424","https://openalex.org/W4288331462","https://openalex.org/W4300799055","https://openalex.org/W4305029953","https://openalex.org/W4308261669","https://openalex.org/W4312891162","https://openalex.org/W6618372016","https://openalex.org/W6635701881","https://openalex.org/W6640963894","https://openalex.org/W6730641667","https://openalex.org/W6740801417","https://openalex.org/W6748012927","https://openalex.org/W6748839928","https://openalex.org/W6753060773","https://openalex.org/W6754957883","https://openalex.org/W6756303580","https://openalex.org/W6764173040","https://openalex.org/W6767649332","https://openalex.org/W6772619266","https://openalex.org/W6773320568","https://openalex.org/W6775634482","https://openalex.org/W6782766965","https://openalex.org/W6786234212","https://openalex.org/W6786644593","https://openalex.org/W6800564222","https://openalex.org/W6803597800","https://openalex.org/W6803644249","https://openalex.org/W6846317354"],"related_works":["https://openalex.org/W2136512912","https://openalex.org/W2127267268","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W149700981","https://openalex.org/W2513478072","https://openalex.org/W4297873223","https://openalex.org/W2350784623","https://openalex.org/W2126211886"],"abstract_inverted_index":{"Goal-Conditioned":[0],"Reinforcement":[1],"Learning":[2],"(GCRL)":[3],"can":[4],"enable":[5],"agents":[6],"to":[7,12,47,51,65,74,83,85,90,138,152,179,191],"spontaneously":[8],"set":[9,15],"diverse":[10],"goals":[11,28],"learn":[13,153],"a":[14,34,71,128,145,167],"of":[16,78,118,184,195],"skills.":[17],"Despite":[18],"the":[19,76,114,124,181,193,221],"excellent":[20],"works":[21,39],"proposed":[22,151],"in":[23,29,107,166,204,225],"various":[24],"fields,":[25],"reaching":[26],"distant":[27],"temporally":[30,140,227],"extended":[31,141,228],"tasks":[32,208],"remains":[33],"challenge":[35],"for":[36,198],"GCRL.":[37,53],"Current":[38],"tackled":[40],"this":[41],"problem":[42],"by":[43],"leveraging":[44],"planning":[45,119],"algorithms":[46],"plan":[48],"intermediate":[49],"subgoals":[50],"augment":[52],"Their":[54],"methods":[55,224],"need":[56],"two":[57],"crucial":[58],"requirements:":[59],"(i)":[60],"A":[61,170],"state":[62,87],"representation":[63],"space":[64,88],"search":[66],"valid":[67],"subgoals,":[68],"and":[69,116,120,160,209],"(ii)":[70],"distance":[72,110,183],"function":[73],"measure":[75],"reachability":[77],"subgoals.":[79,185],"However,":[80],"they":[81,95],"struggle":[82],"scale":[84],"high-dimensional":[86,164],"due":[89],"their":[91],"non-compact":[92],"representations.":[93],"Moreover,":[94,186],"cannot":[96],"collect":[97],"high-quality":[98],"training":[99],"data":[100],"through":[101],"standard":[102],"GC":[103],"policies,":[104],"which":[105,156],"results":[106],"an":[108],"inaccurate":[109],"function.":[111],"Both":[112],"affect":[113],"efficiency":[115],"performance":[117],"policy":[121],"learning.":[122],"In":[123,143],"letter,":[125],"we":[126],"propose":[127],"goal-conditioned":[129],"RL":[130],"algorithm":[131],"combined":[132],"with":[133],"Disentanglement-based":[134],"Reachability":[135,172],"Planning":[136],"(REPlan)":[137],"solve":[139],"tasks.":[142,229],"REPlan,":[144],"Disentangled":[146],"Representation":[147],"Module":[148,174],"(DRM)":[149],"is":[150,176],"compact":[154],"representations":[155],"disentangle":[157],"robot":[158],"poses":[159],"object":[161],"positions":[162],"from":[163],"observations":[165],"self-supervised":[168],"manner.":[169],"simple":[171],"Discrimination":[173],"(REM)":[175],"also":[177],"designed":[178],"determine":[180],"temporal":[182],"REM":[187],"computes":[188],"intrinsic":[189],"bonuses":[190],"encourage":[192],"collection":[194],"novel":[196],"states":[197],"training.":[199],"We":[200],"evaluate":[201],"our":[202,217],"REPlan":[203,218],"three":[205],"vision-based":[206],"simulation":[207],"one":[210],"real-world":[211],"task.":[212],"The":[213],"experiments":[214],"demonstrate":[215],"that":[216],"significantly":[219],"outperforms":[220],"prior":[222],"state-of-the-art":[223],"solving":[226]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
