{"id":"https://openalex.org/W4285294350","doi":"https://doi.org/10.1109/tie.2022.3172754","title":"Solving Robotic Manipulation With Sparse Reward Reinforcement Learning Via Graph-Based Diversity and Proximity","display_name":"Solving Robotic Manipulation With Sparse Reward Reinforcement Learning Via Graph-Based Diversity and Proximity","publication_year":2022,"publication_date":"2022-05-11","ids":{"openalex":"https://openalex.org/W4285294350","doi":"https://doi.org/10.1109/tie.2022.3172754"},"language":"en","primary_location":{"id":"doi:10.1109/tie.2022.3172754","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tie.2022.3172754","pdf_url":null,"source":{"id":"https://openalex.org/S58031724","display_name":"IEEE Transactions on Industrial Electronics","issn_l":"0278-0046","issn":["0278-0046","1557-9948"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Industrial Electronics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://mediatum.ub.tum.de/1657383","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060444894","display_name":"Zhenshan Bing","orcid":"https://orcid.org/0000-0002-0896-2517"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Zhenshan Bing","raw_affiliation_strings":["Department of Informatics, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018299384","display_name":"Hongkuan Zhou","orcid":"https://orcid.org/0000-0002-3665-9822"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hongkuan Zhou","raw_affiliation_strings":["Department of Informatics, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100448563","display_name":"Rui Li","orcid":"https://orcid.org/0000-0002-8877-8524"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Li","raw_affiliation_strings":["School of Automation, Chongqing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Automation, Chongqing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053561890","display_name":"Xiaojie Su","orcid":"https://orcid.org/0000-0003-1802-0264"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojie Su","raw_affiliation_strings":["School of Automation, Chongqing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Automation, Chongqing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088159741","display_name":"Fabrice O. Morin","orcid":"https://orcid.org/0000-0003-0185-7420"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Fabrice O. Morin","raw_affiliation_strings":["Department of Informatics, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100768452","display_name":"Kai Huang","orcid":"https://orcid.org/0000-0003-0359-7810"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Huang","raw_affiliation_strings":["School of Computer Science, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063781430","display_name":"Alois Knoll","orcid":"https://orcid.org/0000-0003-4840-076X"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alois Knoll","raw_affiliation_strings":["Department of Informatics, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5060444894"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":5.378,"has_fulltext":false,"cited_by_count":41,"citation_normalized_percentile":{"value":0.96178236,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"70","issue":"3","first_page":"2759","last_page":"2769"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10731","display_name":"Educational Games and Gamification","score":0.9362000226974487,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10106","display_name":"Autism Spectrum Disorder Research","score":0.9175999760627747,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hindsight-bias","display_name":"Hindsight bias","score":0.9808680415153503},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6538363099098206},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.5945277810096741},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.593406081199646},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5329892039299011},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5282482504844666},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5202828049659729},{"id":"https://openalex.org/keywords/curriculum","display_name":"Curriculum","score":0.5066304802894592},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.4280214011669159},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.22073474526405334},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.17794209718704224},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.08871090412139893}],"concepts":[{"id":"https://openalex.org/C10347200","wikidata":"https://www.wikidata.org/wiki/Q1960297","display_name":"Hindsight bias","level":2,"score":0.9808680415153503},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6538363099098206},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.5945277810096741},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.593406081199646},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5329892039299011},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5282482504844666},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5202828049659729},{"id":"https://openalex.org/C47177190","wikidata":"https://www.wikidata.org/wiki/Q207137","display_name":"Curriculum","level":2,"score":0.5066304802894592},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.4280214011669159},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.22073474526405334},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.17794209718704224},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.08871090412139893},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tie.2022.3172754","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tie.2022.3172754","pdf_url":null,"source":{"id":"https://openalex.org/S58031724","display_name":"IEEE Transactions on Industrial Electronics","issn_l":"0278-0046","issn":["0278-0046","1557-9948"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Industrial Electronics","raw_type":"journal-article"},{"id":"pmh:oai:mediatum.ub.tum.de:node/1657383","is_oa":true,"landing_page_url":"https://mediatum.ub.tum.de/1657383","pdf_url":null,"source":{"id":"https://openalex.org/S4377196330","display_name":"mediaTUM  (Technical University of Munich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I62916508","host_organization_name":"Technical University of Munich","host_organization_lineage":["https://openalex.org/I62916508"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:mediatum.ub.tum.de:node/1657383","is_oa":true,"landing_page_url":"https://mediatum.ub.tum.de/1657383","pdf_url":null,"source":{"id":"https://openalex.org/S4377196330","display_name":"mediaTUM  (Technical University of Munich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I62916508","host_organization_name":"Technical University of Munich","host_organization_lineage":["https://openalex.org/I62916508"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6483959550","display_name":null,"funder_award_id":"945539","funder_id":"https://openalex.org/F4320338336","funder_display_name":"H2020 Future and Emerging Technologies"}],"funders":[{"id":"https://openalex.org/F4320338336","display_name":"H2020 Future and Emerging Technologies","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W2139612737","https://openalex.org/W2169528473","https://openalex.org/W2201581102","https://openalex.org/W2616430965","https://openalex.org/W2744921630","https://openalex.org/W2786917922","https://openalex.org/W2946723315","https://openalex.org/W2963311874","https://openalex.org/W2963523627","https://openalex.org/W2964097858","https://openalex.org/W2969456553","https://openalex.org/W2970393539","https://openalex.org/W2970948392","https://openalex.org/W2978938326","https://openalex.org/W2982432464","https://openalex.org/W3008970887","https://openalex.org/W3100789280","https://openalex.org/W3109467707","https://openalex.org/W3117515147","https://openalex.org/W3174508330","https://openalex.org/W3175584021","https://openalex.org/W3179972165","https://openalex.org/W3209777177","https://openalex.org/W4289440819","https://openalex.org/W4297795006","https://openalex.org/W4297809330","https://openalex.org/W4300799055","https://openalex.org/W6687681856","https://openalex.org/W6737937804","https://openalex.org/W6740801417","https://openalex.org/W6741302124","https://openalex.org/W6742667875","https://openalex.org/W6750007414","https://openalex.org/W6755289019","https://openalex.org/W6762640273","https://openalex.org/W6762868464","https://openalex.org/W6763356705","https://openalex.org/W6766694020","https://openalex.org/W6767047803","https://openalex.org/W6767246565","https://openalex.org/W6774967489"],"related_works":["https://openalex.org/W3197854638","https://openalex.org/W3140454661","https://openalex.org/W4245029315","https://openalex.org/W1492315459","https://openalex.org/W1512434910","https://openalex.org/W2540910169","https://openalex.org/W3148904318","https://openalex.org/W2139970489","https://openalex.org/W2022803902","https://openalex.org/W2105474389"],"abstract_inverted_index":{"In":[0,89],"multigoal":[1],"reinforcement":[2],"learning":[3],"(RL),":[4],"algorithms":[5],"usually":[6],"suffer":[7],"from":[8],"inefficiency":[9],"in":[10,16,41,120,127,133,137],"the":[11,23,38,110],"collection":[12],"of":[13,25,64,99,112],"successful":[14],"experiences":[15],"tasks":[17,73,124],"with":[18,74],"sparse":[19],"rewards.":[20],"By":[21],"utilizing":[22],"ideas":[24],"relabeling":[26],"hindsight":[27,47,51,107],"experience":[28,48],"and":[29,58,77,101,115,130,141,151],"curriculum":[30],"learning,":[31],"some":[32],"prior":[33,146],"works":[34,104,147],"have":[35],"greatly":[36],"improved":[37],"sample":[39,139],"efficiency":[40,140],"robotic":[42],"manipulation":[43,72,123],"tasks,":[44],"such":[45],"as":[46],"replay":[49],"(HER),":[50],"goal":[52],"generation":[53],"(HGG),":[54],"graph-based":[55,113],"HGG":[56,95],"(G-HGG),":[57],"curriculum-guided":[59],"HER":[60],"(CHER).":[61],"However,":[62],"none":[63],"these":[65],"can":[66,153],"learn":[67],"efficiently":[68],"to":[69],"solve":[70],"challenging":[71,122],"distant":[75],"goals":[76,108],"obstacles,":[78],"since":[79],"they":[80],"rely":[81],"either":[82],"on":[83,109],"heuristic":[84],"or":[85],"simple":[86],"distance-guided":[87],"exploration.":[88],"this":[90,157],"article,":[91],"we":[92],"introduce":[93],"graph-curriculum-guided":[94],"(GC-HGG),":[96],"an":[97],"extension":[98],"CHER":[100],"G-HGG,":[102],"which":[103,134],"by":[105],"selecting":[106],"basis":[111],"proximity":[114],"diversity.":[116],"We":[117],"evaluated":[118],"GC-HGG":[119],"four":[121],"involving":[125],"obstacles":[126],"both":[128,138],"simulations":[129],"real-world":[131],"experiments,":[132],"significant":[135],"enhancements":[136],"overall":[142],"success":[143],"rates":[144],"over":[145],"were":[148],"demonstrated.":[149],"Videos":[150],"codes":[152],"be":[154],"viewed":[155],"at":[156],"link:":[158],"<uri":[159],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[160],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://videoviewsite.wixsite.com/gc-hgg</uri>":[161],".":[162]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
