{"id":"https://openalex.org/W4385488897","doi":"https://doi.org/10.1109/ijcnn54540.2023.10192041","title":"Pseudo Value Network Distillation for High-Performance Exploration","display_name":"Pseudo Value Network Distillation for High-Performance Exploration","publication_year":2023,"publication_date":"2023-06-18","ids":{"openalex":"https://openalex.org/W4385488897","doi":"https://doi.org/10.1109/ijcnn54540.2023.10192041"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn54540.2023.10192041","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn54540.2023.10192041","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055979604","display_name":"Enmin Zhao","orcid":"https://orcid.org/0000-0001-6117-5080"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Enmin Zhao","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences","School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076090670","display_name":"Junliang Xing","orcid":"https://orcid.org/0000-0001-6801-0510"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junliang Xing","raw_affiliation_strings":["Tsinghua University,Department of Computer Science and Technology,Beijing,China","Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Computer Science and Technology,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100400000","display_name":"Kai Li","orcid":"https://orcid.org/0000-0003-3840-3270"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Li","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080641582","display_name":"Yongxin Kang","orcid":"https://orcid.org/0000-0002-0468-9234"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongxin Kang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences","School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090751520","display_name":"Pin Tao","orcid":"https://orcid.org/0000-0003-2687-7997"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Pin","raw_affiliation_strings":["Tsinghua University,Department of Computer Science and Technology,Beijing,China","Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Computer Science and Technology,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5055979604"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.1748,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.54478384,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"518","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9754999876022339,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11685","display_name":"Zebrafish Biomedical Research Applications","score":0.9742000102996826,"subfield":{"id":"https://openalex.org/subfields/1307","display_name":"Cell Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/curiosity","display_name":"Curiosity","score":0.7880246639251709},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7326521873474121},{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.7253154516220093},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6923889517784119},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6366553902626038},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.563066840171814},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4722009599208832},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4258801341056824},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2907312512397766},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.13547492027282715},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.08220729231834412}],"concepts":[{"id":"https://openalex.org/C33435437","wikidata":"https://www.wikidata.org/wiki/Q366791","display_name":"Curiosity","level":2,"score":0.7880246639251709},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7326521873474121},{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.7253154516220093},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6923889517784119},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6366553902626038},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.563066840171814},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4722009599208832},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4258801341056824},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2907312512397766},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.13547492027282715},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.08220729231834412},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn54540.2023.10192041","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn54540.2023.10192041","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":73,"referenced_works":["https://openalex.org/W1850531616","https://openalex.org/W1931877416","https://openalex.org/W2113023245","https://openalex.org/W2145339207","https://openalex.org/W2417786368","https://openalex.org/W2596982695","https://openalex.org/W2736601468","https://openalex.org/W2787236409","https://openalex.org/W2788862220","https://openalex.org/W2803616302","https://openalex.org/W2804380964","https://openalex.org/W2949475445","https://openalex.org/W2950735232","https://openalex.org/W2950794298","https://openalex.org/W2953100042","https://openalex.org/W2962730405","https://openalex.org/W2963262099","https://openalex.org/W2963276097","https://openalex.org/W2963277051","https://openalex.org/W2963359646","https://openalex.org/W2963523627","https://openalex.org/W2964043796","https://openalex.org/W2964067469","https://openalex.org/W2964983108","https://openalex.org/W2969456553","https://openalex.org/W2970948392","https://openalex.org/W2997289589","https://openalex.org/W3018036994","https://openalex.org/W3129322645","https://openalex.org/W3175662487","https://openalex.org/W3175870124","https://openalex.org/W4221164780","https://openalex.org/W4225963367","https://openalex.org/W4287867830","https://openalex.org/W4289440819","https://openalex.org/W4295837424","https://openalex.org/W4297789121","https://openalex.org/W4297804636","https://openalex.org/W4300198501","https://openalex.org/W4300799055","https://openalex.org/W4306176261","https://openalex.org/W6639056794","https://openalex.org/W6640174482","https://openalex.org/W6676728370","https://openalex.org/W6685757253","https://openalex.org/W6692846177","https://openalex.org/W6703271639","https://openalex.org/W6716474083","https://openalex.org/W6717230150","https://openalex.org/W6718092244","https://openalex.org/W6730641667","https://openalex.org/W6734205138","https://openalex.org/W6735033012","https://openalex.org/W6735865323","https://openalex.org/W6740801417","https://openalex.org/W6740836278","https://openalex.org/W6741002519","https://openalex.org/W6748271565","https://openalex.org/W6748321296","https://openalex.org/W6748972340","https://openalex.org/W6751285671","https://openalex.org/W6751540476","https://openalex.org/W6751955673","https://openalex.org/W6755289019","https://openalex.org/W6756303580","https://openalex.org/W6758641611","https://openalex.org/W6763356705","https://openalex.org/W6766271513","https://openalex.org/W6766694020","https://openalex.org/W6771807793","https://openalex.org/W6809531794","https://openalex.org/W6810671056","https://openalex.org/W6846255677"],"related_works":["https://openalex.org/W3094054656","https://openalex.org/W4285676344","https://openalex.org/W2123270665","https://openalex.org/W4382584175","https://openalex.org/W2060310955","https://openalex.org/W2284924956","https://openalex.org/W3043413210","https://openalex.org/W3039898216","https://openalex.org/W2185422427","https://openalex.org/W2478680874"],"abstract_inverted_index":{"Solving":[0],"hard":[1,172],"exploration":[2,125,173],"tasks":[3],"with":[4,68],"sparse":[5,174],"rewards":[6,146],"is":[7],"notoriously":[8],"challenging":[9],"in":[10,53,117,170],"reinforcement":[11],"learning":[12],"(RL),":[13],"which":[14],"needs":[15],"to":[16,42,65,92,108,113,127,147,162],"address":[17],"two":[18],"key":[19],"issues":[20],"simultaneously:":[21],"exploiting":[22],"past":[23,78,121],"successful":[24,38],"experiences":[25,39,80,122],"and":[26,40,98,102,123,156,181],"exploring":[27],"the":[28,63,94,114,128,164],"unknown":[29],"environment.":[30],"Many":[31],"prior":[32],"works":[33],"take":[34],"expert":[35],"demonstrations":[36,48],"as":[37],"learn":[41],"imitate":[43],"them":[44],"directly.":[45],"However,":[46],"these":[47],"are":[49],"often":[50],"not":[51],"available":[52],"practice.":[54],"Recently,":[55],"curiosity-driven":[56],"RL":[57,95],"methods":[58],"provide":[59],"intrinsic":[60,145],"rewards,":[61],"encouraging":[62],"agent":[64],"explore":[66],"states":[67,116,130],"high":[69,110,124],"novelty.":[70],"Nonetheless,":[71],"they":[72],"lack":[73],"a":[74,85],"mechanism":[75],"for":[76,160],"leveraging":[77],"good":[79],"effectively.":[81],"This":[82],"work":[83],"presents":[84],"Pseudo":[86],"Value":[87],"Network":[88],"Distillation":[89],"(PVND)":[90],"framework":[91],"balance":[93],"agent's":[96],"exploitative":[97],"exploratory":[99],"behaviors":[100],"effectively":[101],"automatically.":[103],"In":[104],"particular,":[105],"PVND":[106,141,152],"learns":[107],"set":[109],"exploitation":[111],"bonuses":[112,126],"critical":[115,149,157],"rewarded":[118],"trajectories":[119],"from":[120],"novel":[129],"that":[131,140],"agents":[132,161],"rarely":[133],"visit":[134],"during":[135],"exploration.":[136],"We":[137],"theoretically":[138],"demonstrate":[139],"gives":[142],"larger":[143],"positive":[144],"more":[148],"states.":[150],"Furthermore,":[151],"automatically":[153],"finds":[154],"meaningful":[155],"hierarchical":[158],"sub-tasks":[159],"accomplish":[163],"final":[165],"goal":[166],"progressively.":[167],"Competitive":[168],"results":[169],"several":[171],"reward":[175],"problems":[176],"have":[177],"verified":[178],"its":[179],"effectiveness":[180],"efficiency.":[182]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
