{"id":"https://openalex.org/W4200575454","doi":"https://doi.org/10.1109/iros51168.2021.9636842","title":"Trajectory-based Split Hindsight Reverse Curriculum Learning","display_name":"Trajectory-based Split Hindsight Reverse Curriculum Learning","publication_year":2021,"publication_date":"2021-09-27","ids":{"openalex":"https://openalex.org/W4200575454","doi":"https://doi.org/10.1109/iros51168.2021.9636842"},"language":"en","primary_location":{"id":"doi:10.1109/iros51168.2021.9636842","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros51168.2021.9636842","pdf_url":null,"source":{"id":"https://openalex.org/S4363607734","display_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100681762","display_name":"Jiaxi Wu","orcid":"https://orcid.org/0000-0002-6976-4784"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiaxi Wu","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012489739","display_name":"Dianmin Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dianmin Zhang","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012120743","display_name":"Shanlin Zhong","orcid":"https://orcid.org/0000-0002-2132-064X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shanlin Zhong","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026688050","display_name":"Hong Qiao","orcid":"https://orcid.org/0000-0001-6384-3687"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210097554","display_name":"Center for Excellence in Brain Science and Intelligence Technology","ror":"https://ror.org/00vpwhm04","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210097554"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Qiao","raw_affiliation_strings":["Center for Excellence in Brain Science and Intelligence Technology, Institute of Neuroscience, Chinese Academy of Sciences, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Excellence in Brain Science and Intelligence Technology, Institute of Neuroscience, Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210097554","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100681762"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210100255","https://openalex.org/I4210112150","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16903882,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"37","issue":null,"first_page":"3971","last_page":"3978"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hindsight-bias","display_name":"Hindsight bias","score":0.9660297632217407},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7575867176055908},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7406168580055237},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.7111069560050964},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6568846702575684},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6394625902175903},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.592130720615387},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.48007091879844666},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4248017966747284},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.14691337943077087},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12051218748092651},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09009373188018799}],"concepts":[{"id":"https://openalex.org/C10347200","wikidata":"https://www.wikidata.org/wiki/Q1960297","display_name":"Hindsight bias","level":2,"score":0.9660297632217407},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7575867176055908},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7406168580055237},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.7111069560050964},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6568846702575684},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6394625902175903},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.592130720615387},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48007091879844666},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4248017966747284},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14691337943077087},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12051218748092651},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09009373188018799},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros51168.2021.9636842","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros51168.2021.9636842","pdf_url":null,"source":{"id":"https://openalex.org/S4363607734","display_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W1575592356","https://openalex.org/W1771410628","https://openalex.org/W1931877416","https://openalex.org/W2100538121","https://openalex.org/W2132714442","https://openalex.org/W2158782408","https://openalex.org/W2296073425","https://openalex.org/W2605102758","https://openalex.org/W2616430965","https://openalex.org/W2736601468","https://openalex.org/W2741122588","https://openalex.org/W2767050701","https://openalex.org/W2775954438","https://openalex.org/W2787938642","https://openalex.org/W2810785043","https://openalex.org/W2946314455","https://openalex.org/W2962822210","https://openalex.org/W2963099939","https://openalex.org/W2963311874","https://openalex.org/W2963864421","https://openalex.org/W2968116426","https://openalex.org/W3101442004","https://openalex.org/W3105609823","https://openalex.org/W3157685993","https://openalex.org/W4289440819","https://openalex.org/W4293864724","https://openalex.org/W4297686387","https://openalex.org/W4297818513","https://openalex.org/W4300799055","https://openalex.org/W4302570325","https://openalex.org/W6634413486","https://openalex.org/W6638018090","https://openalex.org/W6640174482","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6737327832","https://openalex.org/W6737937804","https://openalex.org/W6740023956","https://openalex.org/W6740801417","https://openalex.org/W6741002519","https://openalex.org/W6741302124","https://openalex.org/W6742461812","https://openalex.org/W6746914971","https://openalex.org/W6748839928","https://openalex.org/W6752818329","https://openalex.org/W6753243525","https://openalex.org/W6755289019","https://openalex.org/W6780559895","https://openalex.org/W6786189056"],"related_works":["https://openalex.org/W3197854638","https://openalex.org/W3012552522","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W4297873223","https://openalex.org/W2350784623","https://openalex.org/W2126211886","https://openalex.org/W3009457412","https://openalex.org/W2992629954"],"abstract_inverted_index":{"Grasping":[0],"is":[1,32,47,138],"one":[2],"of":[3,17,58,80,96],"the":[4,15,45,56,84,89,93,97,104,115,126,135,144,169,174],"most":[5],"fundamental":[6],"problems":[7],"in":[8,25,109,120],"robotic":[9,27],"manipulation.":[10],"In":[11,160],"recent":[12],"years,":[13],"with":[14,178],"development":[16],"data-driven":[18],"methods,":[19],"reinforcement":[20,59],"learning":[21,60,82,90],"has":[22],"been":[23],"used":[24],"solving":[26],"grasping":[28,31,176],"problems.":[29],"However,":[30],"a":[33,68,110],"long-horizon":[34],"and":[35,92,117,163,172,182],"sparse":[36],"reward":[37,41],"task,":[38],"whose":[39],"natural":[40],"only":[42],"appears":[43],"when":[44],"task":[46],"successfully":[48],"achieved.":[49],"Therefore,":[50],"it":[51],"brings":[52],"great":[53],"challenges":[54],"to":[55,107,152,155],"deployment":[57],"methods.":[61],"To":[62],"tackle":[63],"this":[64],"difficulty,":[65],"we":[66],"propose":[67],"new":[69],"method":[70,79,167],"called":[71],"Trajectory-based":[72],"Split":[73],"Hindsight":[74],"Reverse":[75],"Curriculum":[76],"Learning.":[77],"This":[78],"reverse":[81],"from":[83],"goal":[85,116],"can":[86,190],"greatly":[87],"improve":[88],"efficiency":[91],"final":[94],"performance":[95],"tasks.":[98],"Specifically,":[99],"based":[100],"on":[101],"referred":[102],"trajectories,":[103],"agent":[105],"starts":[106],"learn":[108],"small":[111],"state":[112,122,128],"space":[113],"near":[114],"then":[118],"gradually":[119],"larger":[121],"spaces":[123],"until":[124],"covering":[125],"entire":[127],"space.":[129],"Through":[130],"split":[131],"hindsight":[132],"experience":[133],"replay,":[134],"sampled":[136],"trajectory":[137],"divided":[139],"into":[140],"segments":[141],"that":[142],"match":[143],"current":[145],"subspace's":[146],"size;":[147],"then,":[148],"they":[149],"are":[150],"modified":[151],"successful":[153],"trajectories":[154],"enable":[156],"more":[157],"efficient":[158],"learning.":[159],"both":[161],"simulation":[162],"real-world":[164],"experiments,":[165],"our":[166],"surpasses":[168],"existing":[170],"methods":[171],"achieves":[173],"goal-oriented":[175],"tasks":[177],"higher":[179],"success":[180],"rates":[181],"better":[183],"data":[184],"efficiencies.":[185],"The":[186],"detailed":[187],"experimental":[188],"results":[189],"be":[191],"viewed":[192],"at":[193],"https://youtu.be/7uNRzmRZhDk.":[194]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
