{"id":"https://openalex.org/W4399601459","doi":"https://doi.org/10.1109/tai.2024.3413692","title":"Self-Supervised Exploration via Temporal Inconsistency in Reinforcement Learning","display_name":"Self-Supervised Exploration via Temporal Inconsistency in Reinforcement Learning","publication_year":2024,"publication_date":"2024-06-13","ids":{"openalex":"https://openalex.org/W4399601459","doi":"https://doi.org/10.1109/tai.2024.3413692"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2024.3413692","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2024.3413692","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016262505","display_name":"Zijian Gao","orcid":"https://orcid.org/0000-0001-5151-3381"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zijian Gao","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China","School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013340793","display_name":"Kele Xu","orcid":"https://orcid.org/0000-0001-5997-5169"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kele Xu","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China","School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073132517","display_name":"Yuanzhao Zhai","orcid":"https://orcid.org/0000-0003-1385-0074"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanzhao Zhai","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China","School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088885490","display_name":"Bo Ding","orcid":"https://orcid.org/0000-0002-1236-8318"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Ding","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China","School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039795290","display_name":"Dawei Feng","orcid":"https://orcid.org/0000-0002-7587-8905"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Feng","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China","School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083124500","display_name":"Xinjun Mao","orcid":"https://orcid.org/0000-0001-6003-5748"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinjun Mao","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China","School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076087924","display_name":"Huaimin Wang","orcid":"https://orcid.org/0000-0003-0376-640X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaimin Wang","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, China","School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"School of Computer, National University of Defense Technology and State Key Laboratory of Complex &#x0026; Critical Software Environment, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5016262505"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.7252,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.74276171,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"5","issue":"11","first_page":"5530","last_page":"5539"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.5450000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.5450000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5070000290870667,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.48570001125335693,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7361773252487183},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5878994464874268},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.48335927724838257},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40956851840019226},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.3311607241630554},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2982240617275238},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.0955110490322113}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7361773252487183},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5878994464874268},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48335927724838257},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40956851840019226},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3311607241630554},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2982240617275238},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0955110490322113}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2024.3413692","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2024.3413692","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.5}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W1534477342","https://openalex.org/W2012833704","https://openalex.org/W2020920737","https://openalex.org/W2098877457","https://openalex.org/W2101524054","https://openalex.org/W2103104224","https://openalex.org/W2116459397","https://openalex.org/W2118550318","https://openalex.org/W2131600418","https://openalex.org/W2145339207","https://openalex.org/W2150468603","https://openalex.org/W2170899200","https://openalex.org/W2489939061","https://openalex.org/W2739512435","https://openalex.org/W2761873684","https://openalex.org/W2786917922","https://openalex.org/W2803790180","https://openalex.org/W2805127664","https://openalex.org/W2963359646","https://openalex.org/W2963523627","https://openalex.org/W3002765113","https://openalex.org/W3035576098","https://openalex.org/W3036619998","https://openalex.org/W3097646105","https://openalex.org/W3181648466","https://openalex.org/W3208238113","https://openalex.org/W4234435152","https://openalex.org/W4386157387","https://openalex.org/W4387408992","https://openalex.org/W4387431619","https://openalex.org/W4391939531","https://openalex.org/W6622487243","https://openalex.org/W6677671969","https://openalex.org/W6683436435","https://openalex.org/W6684921986","https://openalex.org/W6717230150","https://openalex.org/W6726216181","https://openalex.org/W6735033012","https://openalex.org/W6735506055","https://openalex.org/W6741002519","https://openalex.org/W6742667875","https://openalex.org/W6748603076","https://openalex.org/W6753925943","https://openalex.org/W6755728437","https://openalex.org/W6756303580","https://openalex.org/W6762863188","https://openalex.org/W6765240361","https://openalex.org/W6780328113","https://openalex.org/W6780470247","https://openalex.org/W6780562256","https://openalex.org/W6787713516","https://openalex.org/W6791000347","https://openalex.org/W6791194670","https://openalex.org/W6796667233","https://openalex.org/W6803067813","https://openalex.org/W6843809954","https://openalex.org/W6845469943"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588"],"abstract_inverted_index":{"In":[0],"sparse":[1],"extrinsic":[2],"reward":[3,27,49,171],"settings,":[4],"reinforcement":[5,174],"learning":[6,54],"remains":[7],"a":[8,46,71,103],"challenge":[9],"despite":[10],"increasing":[11],"interest":[12],"in":[13,154],"this":[14],"field.":[15],"Existing":[16],"approaches":[17],"suggest":[18],"that":[19,150],"intrinsic":[20,48,98,155],"rewards":[21,156],"can":[22,157],"alleviate":[23],"issues":[24],"caused":[25],"by":[26,52,60],"sparsity.":[28],"However,":[29],"many":[30],"studies":[31],"overlook":[32],"the":[33,79,84,89,112,115,127],"critical":[34],"role":[35],"of":[36,78,129],"temporal":[37,90,152],"information,":[38],"essential":[39],"for":[40,173],"human":[41,53],"curiosity.":[42],"This":[43],"article":[44],"introduces":[45],"novel":[47],"mechanism":[50,106],"inspired":[51],"processes,":[55],"where":[56],"curiosity":[57],"is":[58],"evaluated":[59],"comparing":[61],"current":[62],"observations":[63],"with":[64],"historical":[65],"knowledge.":[66],"Our":[67,147],"method":[68],"involves":[69],"training":[70,140],"self-supervised":[72],"prediction":[73],"model,":[74],"periodically":[75],"saving":[76],"snapshots":[77,96],"model":[80],"parameters,":[81],"and":[82,118,142,169],"employing":[83],"nuclear":[85],"norm":[86],"to":[87,107,111,165],"assess":[88],"inconsistency":[91],"between":[92],"predictions":[93],"from":[94],"different":[95],"as":[97],"rewards.":[99],"Additionally,":[100],"we":[101],"propose":[102],"variational":[104],"weighting":[105],"adaptively":[108],"assign":[109],"weights":[110],"snapshots,":[113],"enhancing":[114],"model's":[116],"robustness":[117],"performance.":[119],"Experimental":[120],"results":[121],"across":[122],"various":[123],"benchmark":[124],"environments":[125],"demonstrate":[126],"efficacy":[128],"our":[130],"approach,":[131],"which":[132],"outperforms":[133],"other":[134],"state-of-the-art":[135],"methods":[136],"without":[137],"incurring":[138],"additional":[139],"costs":[141],"exhibits":[143],"higher":[144],"noise":[145],"tolerance.":[146],"findings":[148],"indicate":[149],"leveraging":[151],"information":[153],"significantly":[158],"improve":[159],"exploration":[160],"performance,":[161],"motivating":[162],"future":[163],"research":[164],"develop":[166],"more":[167],"robust":[168],"accurate":[170],"systems":[172],"learning.":[175]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
