{"id":"https://openalex.org/W4387546333","doi":"https://doi.org/10.1109/tai.2023.3323628","title":"Nuclear Norm Maximization-Based Curiosity-Driven Reinforcement Learning","display_name":"Nuclear Norm Maximization-Based Curiosity-Driven Reinforcement Learning","publication_year":2023,"publication_date":"2023-10-11","ids":{"openalex":"https://openalex.org/W4387546333","doi":"https://doi.org/10.1109/tai.2023.3323628"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2023.3323628","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2023.3323628","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114911427","display_name":"Chao Chen","orcid":"https://orcid.org/0000-0003-4649-4061"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chao Chen","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073132517","display_name":"Yuanzhao Zhai","orcid":"https://orcid.org/0000-0003-1385-0074"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanzhao Zhai","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016262505","display_name":"Zijian Gao","orcid":"https://orcid.org/0000-0001-5151-3381"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zijian Gao","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013340793","display_name":"Kele Xu","orcid":"https://orcid.org/0000-0001-5997-5169"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kele Xu","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002854172","display_name":"Sen Yang","orcid":"https://orcid.org/0000-0003-3222-2268"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sen Yang","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102770021","display_name":"Yiying Li","orcid":"https://orcid.org/0000-0002-2632-5175"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiying Li","raw_affiliation_strings":["Artificial Intelligence Research Center, DII, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Research Center, DII, Beijing, China","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088885490","display_name":"Bo Ding","orcid":"https://orcid.org/0000-0002-1236-8318"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Ding","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039795290","display_name":"Dawei Feng","orcid":"https://orcid.org/0000-0002-7587-8905"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Feng","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101522100","display_name":"Huaimin Wang","orcid":"https://orcid.org/0000-0002-3245-1901"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaimin Wang","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5114911427"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":1.5734,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.86618956,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"5","issue":"5","first_page":"2410","last_page":"2421"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9613999724388123,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.8179299235343933},{"id":"https://openalex.org/keywords/curiosity","display_name":"Curiosity","score":0.7246309518814087},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7056782841682434},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6581948399543762},{"id":"https://openalex.org/keywords/maximization","display_name":"Maximization","score":0.6110231876373291},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5936090350151062},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.49030259251594543},{"id":"https://openalex.org/keywords/norm","display_name":"Norm (philosophy)","score":0.48233839869499207},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.46385928988456726},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.2808990776538849},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22036227583885193},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.14198365807533264},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.08581748604774475}],"concepts":[{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.8179299235343933},{"id":"https://openalex.org/C33435437","wikidata":"https://www.wikidata.org/wiki/Q366791","display_name":"Curiosity","level":2,"score":0.7246309518814087},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7056782841682434},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6581948399543762},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.6110231876373291},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5936090350151062},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49030259251594543},{"id":"https://openalex.org/C191795146","wikidata":"https://www.wikidata.org/wiki/Q3878446","display_name":"Norm (philosophy)","level":2,"score":0.48233839869499207},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.46385928988456726},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2808990776538849},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22036227583885193},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.14198365807533264},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.08581748604774475},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2023.3323628","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2023.3323628","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.550000011920929}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":105,"referenced_works":["https://openalex.org/W172298727","https://openalex.org/W779494576","https://openalex.org/W1693986406","https://openalex.org/W2009551863","https://openalex.org/W2028513945","https://openalex.org/W2045377163","https://openalex.org/W2101524054","https://openalex.org/W2103104224","https://openalex.org/W2108114251","https://openalex.org/W2131792768","https://openalex.org/W2138996475","https://openalex.org/W2139612737","https://openalex.org/W2145339207","https://openalex.org/W2149101915","https://openalex.org/W2150468603","https://openalex.org/W2161966552","https://openalex.org/W2165395308","https://openalex.org/W2167065813","https://openalex.org/W2170899200","https://openalex.org/W2489939061","https://openalex.org/W2736601468","https://openalex.org/W2761873684","https://openalex.org/W2766447205","https://openalex.org/W2790385355","https://openalex.org/W2791797404","https://openalex.org/W2794187674","https://openalex.org/W2805127664","https://openalex.org/W2895453875","https://openalex.org/W2901836533","https://openalex.org/W2918882492","https://openalex.org/W2922388521","https://openalex.org/W2953326529","https://openalex.org/W2963262099","https://openalex.org/W2963276097","https://openalex.org/W2963359646","https://openalex.org/W2963523627","https://openalex.org/W2963761387","https://openalex.org/W2964505566","https://openalex.org/W2969456553","https://openalex.org/W2970272688","https://openalex.org/W2970392982","https://openalex.org/W2973229164","https://openalex.org/W2982316857","https://openalex.org/W2991562719","https://openalex.org/W3035576098","https://openalex.org/W3036619998","https://openalex.org/W3041394538","https://openalex.org/W3041795964","https://openalex.org/W3094490555","https://openalex.org/W3097646105","https://openalex.org/W3100789280","https://openalex.org/W3122081773","https://openalex.org/W3136301269","https://openalex.org/W3161747711","https://openalex.org/W3162654685","https://openalex.org/W3199364882","https://openalex.org/W3208238113","https://openalex.org/W3211360571","https://openalex.org/W3214229832","https://openalex.org/W4214717370","https://openalex.org/W4220747123","https://openalex.org/W4221154005","https://openalex.org/W4287167165","https://openalex.org/W4287631529","https://openalex.org/W4287660295","https://openalex.org/W4306679387","https://openalex.org/W6622487243","https://openalex.org/W6637591494","https://openalex.org/W6676576766","https://openalex.org/W6683603353","https://openalex.org/W6684295950","https://openalex.org/W6686227676","https://openalex.org/W6703271639","https://openalex.org/W6717230150","https://openalex.org/W6735033012","https://openalex.org/W6741002519","https://openalex.org/W6747473740","https://openalex.org/W6748523217","https://openalex.org/W6753925943","https://openalex.org/W6754957883","https://openalex.org/W6756303580","https://openalex.org/W6760518690","https://openalex.org/W6762319895","https://openalex.org/W6762863188","https://openalex.org/W6762868464","https://openalex.org/W6764988152","https://openalex.org/W6765240361","https://openalex.org/W6766694020","https://openalex.org/W6767151588","https://openalex.org/W6767820044","https://openalex.org/W6771807793","https://openalex.org/W6773246137","https://openalex.org/W6780470247","https://openalex.org/W6780562256","https://openalex.org/W6785350144","https://openalex.org/W6787713516","https://openalex.org/W6791000347","https://openalex.org/W6791194670","https://openalex.org/W6795637536","https://openalex.org/W6796667233","https://openalex.org/W6803067813","https://openalex.org/W6803709789","https://openalex.org/W6804177976","https://openalex.org/W6809799041","https://openalex.org/W7075998887"],"related_works":["https://openalex.org/W3094054656","https://openalex.org/W4285676344","https://openalex.org/W4382584175","https://openalex.org/W2123270665","https://openalex.org/W2060310955","https://openalex.org/W2284924956","https://openalex.org/W3039898216","https://openalex.org/W2185422427","https://openalex.org/W2478680874","https://openalex.org/W874708728"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,87],"(RL)":[2],"has":[3],"achieved":[4],"promising":[5],"results":[6],"in":[7,37,158],"solving":[8],"numerous":[9],"challenging":[10],"sequential":[11],"decision":[12],"problems.":[13],"To":[14],"address":[15],"the":[16,28,38,57,64,95,110,140,143,194],"issue":[17],"of":[18,40,59,67,142,184,189,196],"sparse":[19],"extrinsic":[20],"rewards,":[21,26,178],"researchers":[22],"have":[23],"proposed":[24],"intrinsic":[25,48,70,177,201],"enabling":[27],"agent":[29],"to":[30,55,63,80,86,104,120,169],"acquire":[31],"skills":[32],"that":[33,137,163],"may":[34],"prove":[35],"valuable":[36],"pursuit":[39],"future":[41],"rewards.":[42,202],"One":[43],"representative":[44],"approach":[45],"for":[46,151],"generating":[47],"rewards":[49,71],"involves":[50],"constructing":[51],"a":[52,133,148,181,187],"predictive":[53],"model":[54],"assess":[56],"novelty":[58,141],"states.":[60],"However,":[61],"due":[62],"stochastic":[65],"nature":[66],"complex":[68],"environments,":[69],"can":[72,83],"be":[73,84],"noisy.":[74],"Directly":[75],"employing":[76],"noisy":[77],"forward":[78],"predictions":[79],"supervise":[81],"policies":[82],"detrimental":[85],"performance":[88,167,195],"and":[89,153],"efficiency.":[90],"Many":[91],"recent":[92],"studies":[93],"utilize":[94],"<inline-formula":[96],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[97],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[98],"notation=\"LaTeX\">$\\ell":[99],"_{2}$</tex-math></inline-formula>":[100],"norm":[101],"or":[102],"variance":[103],"measure":[105],"novelty,":[106],"which":[107],"further":[108],"amplifies":[109],"noise":[111,152],"through":[112],"squaring":[113],"operations.":[114],"In":[115],"this":[116],"paper,":[117],"we":[118,131],"aim":[119],"tackle":[121],"these":[122],"challenges":[123],"by":[124],"leveraging":[125],"Nuclear":[126],"Norm":[127],"Maximization":[128],"(NNM).":[129],"Specifically,":[130],"propose":[132],"novel":[134],"curiosity":[135],"reward":[136],"accurately":[138],"quantifies":[139],"exploration":[144],"environment":[145],"while":[146],"exhibiting":[147],"high":[149],"tolerance":[150],"outliers.":[154],"Our":[155],"extensive":[156],"experiments":[157],"various":[159],"benchmark":[160],"environments":[161],"demonstrate":[162],"NNM":[164,179],"achieves":[165,180],"state-of-the-art":[166],"compared":[168],"previous":[170],"curiosity-based":[171],"methods.":[172],"When":[173],"trained":[174],"solely":[175],"with":[176],"human-normalized":[182],"score":[183],"1.09":[185],"on":[186,199],"subset":[188],"26":[190],"Atari":[191],"games,":[192],"twice":[193],"methods":[197],"based":[198],"competitive":[200]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
