{"id":"https://openalex.org/W3034962946","doi":"https://doi.org/10.24963/ijcai.2020/290","title":"Potential Driven Reinforcement Learning for Hard Exploration Tasks","display_name":"Potential Driven Reinforcement Learning for Hard Exploration Tasks","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3034962946","doi":"https://doi.org/10.24963/ijcai.2020/290","mag":"3034962946"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2020/290","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/290","pdf_url":"https://www.ijcai.org/proceedings/2020/0290.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2020/0290.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055979604","display_name":"Enmin Zhao","orcid":"https://orcid.org/0000-0001-6117-5080"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Enmin Zhao","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences\uff1bSchool of Artificial Intelligence, University of Chinese Academy of Sciences","Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences\uff1bSchool of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108126327","display_name":"Shihong Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shihong Deng","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049451354","display_name":"Yifan Zang","orcid":"https://orcid.org/0000-0003-4537-384X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifan Zang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences\uff1bSchool of Artificial Intelligence, University of Chinese Academy of Sciences","Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences\uff1bSchool of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080641582","display_name":"Yongxin Kang","orcid":"https://orcid.org/0000-0002-0468-9234"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongxin Kang","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences; Institute of Automation, Chinese Academy of Sciences","Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences; Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100400000","display_name":"Kai Li","orcid":"https://orcid.org/0000-0003-3840-3270"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Li","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076090670","display_name":"Junliang Xing","orcid":"https://orcid.org/0000-0001-6801-0510"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junliang Xing","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences; School of Artificial Intelligence, University of Chinese Academy of Sciences","Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences; School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5055979604"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.6628,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.75473855,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2096","last_page":"2102"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9853000044822693,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8462062478065491},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.6987907290458679},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6728609204292297},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.509795606136322},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3898788094520569},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1261991262435913}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8462062478065491},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.6987907290458679},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6728609204292297},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.509795606136322},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3898788094520569},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1261991262435913},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C548081761","wikidata":"https://www.wikidata.org/wiki/Q180388","display_name":"Waste management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2020/290","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/290","pdf_url":"https://www.ijcai.org/proceedings/2020/0290.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2020/290","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/290","pdf_url":"https://www.ijcai.org/proceedings/2020/0290.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7200000286102295,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3034962946.pdf","grobid_xml":"https://content.openalex.org/works/W3034962946.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W779494576","https://openalex.org/W1988526405","https://openalex.org/W2103120971","https://openalex.org/W2110144538","https://openalex.org/W2145339207","https://openalex.org/W2201581102","https://openalex.org/W2612610049","https://openalex.org/W2614839826","https://openalex.org/W2736601468","https://openalex.org/W2765308067","https://openalex.org/W2788862220","https://openalex.org/W2804380964","https://openalex.org/W2805560727","https://openalex.org/W2895453875","https://openalex.org/W2895626374","https://openalex.org/W2949475445","https://openalex.org/W2953100042","https://openalex.org/W2963095800","https://openalex.org/W2963099939","https://openalex.org/W2963276097","https://openalex.org/W2963376229","https://openalex.org/W2963477884","https://openalex.org/W2963523627","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2964062135","https://openalex.org/W2964067469","https://openalex.org/W2970948392","https://openalex.org/W4289440819","https://openalex.org/W4295837424","https://openalex.org/W4298857171","https://openalex.org/W4300198501","https://openalex.org/W4300799055"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2280422768","https://openalex.org/W3143197806","https://openalex.org/W4252555497","https://openalex.org/W3121175838","https://openalex.org/W3016293053","https://openalex.org/W1690653314","https://openalex.org/W2401723157"],"abstract_inverted_index":{"Experience":[0,77],"replay":[1,51,73,107],"plays":[2],"a":[3,81,98],"crucial":[4],"role":[5],"in":[6,62,89,105],"Reinforcement":[7],"Learning":[8],"(RL),":[9],"enabling":[10],"the":[11,19,59,67,110,135],"agent":[12,111],"to":[13,54,112],"remember":[14],"and":[15,45,74,83,108,117,141,153],"reuse":[16],"experience":[17,25,50,72,106],"from":[18,42,114],"past.":[20],"Most":[21],"previous":[22],"methods":[23],"sample":[24],"transitions":[26],"using":[27,120],"simple":[28],"heuristics":[29],"like":[30],"uniformly":[31],"sampling":[32,85],"or":[33],"prioritizing":[34],"those":[35],"good":[36,44,116],"ones.":[37],"Since":[38],"humans":[39],"can":[40,125],"learn":[41,113],"both":[43,115],"bad":[46,118],"experiences,":[47],"more":[48],"sophisticated":[49],"algorithms":[52,131],"need":[53],"be":[55,126],"developed.":[56],"Inspired":[57],"by":[58],"potential":[60,69,99],"energy":[61,100],"physics,":[63],"this":[64],"work":[65],"introduces":[66],"artificial":[68],"field":[70],"into":[71],"develops":[75],"Potentialized":[76],"Replay":[78],"(PotER)":[79],"as":[80,132,134],"new":[82],"effective":[84],"algorithm":[86],"for":[87,102],"RL":[88,130],"hard":[90,146],"exploration":[91,147],"tasks":[92],"with":[93,128],"sparse":[94],"rewards.":[95],"PotER":[96,124],"defines":[97],"function":[101],"each":[103],"state":[104,122],"helps":[109],"experiences":[119],"intrinsic":[121],"supervision.":[123],"combined":[127],"different":[129],"well":[133],"self-imitation":[136],"learning":[137],"algorithm.":[138],"Experimental":[139],"analyses":[140],"comparisons":[142],"on":[143],"multiple":[144],"challenging":[145],"environments":[148],"have":[149],"verified":[150],"its":[151],"effectiveness":[152],"efficiency.":[154]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
