{"id":"https://openalex.org/W4319165238","doi":"https://doi.org/10.1109/tnnls.2023.3236361","title":"Exploration in Deep Reinforcement Learning: From Single-Agent to Multiagent Domain","display_name":"Exploration in Deep Reinforcement Learning: From Single-Agent to Multiagent Domain","publication_year":2023,"publication_date":"2023-01-19","ids":{"openalex":"https://openalex.org/W4319165238","doi":"https://doi.org/10.1109/tnnls.2023.3236361","pmid":"https://pubmed.ncbi.nlm.nih.gov/37021882"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2023.3236361","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3236361","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047509839","display_name":"Jianye Hao","orcid":"https://orcid.org/0000-0002-0422-8235"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianye Hao","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101619771","display_name":"Tianpei Yang","orcid":"https://orcid.org/0000-0002-5497-7146"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianpei Yang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103176048","display_name":"Hongyao Tang","orcid":"https://orcid.org/0000-0001-7478-7684"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongyao Tang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044788927","display_name":"Chenjia Bai","orcid":"https://orcid.org/0000-0002-8379-9385"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenjia Bai","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100766553","display_name":"Jinyi Liu","orcid":"https://orcid.org/0000-0002-4537-348X"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinyi Liu","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005604730","display_name":"Zhaopeng Meng","orcid":"https://orcid.org/0000-0001-6019-5952"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaopeng Meng","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100346747","display_name":"Peng Liu","orcid":"https://orcid.org/0000-0001-6568-1335"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Liu","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100422377","display_name":"Zhen Wang","orcid":"https://orcid.org/0000-0002-8182-2852"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen Wang","raw_affiliation_strings":["School of Artificial Intelligence, OPtics and Electronics (iOPEN) and the School of Cyberspace, Northwestern Polytechnical University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, OPtics and Electronics (iOPEN) and the School of Cyberspace, Northwestern Polytechnical University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I17145004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5047509839"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":26.3289,"has_fulltext":false,"cited_by_count":153,"citation_normalized_percentile":{"value":0.99736938,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"35","issue":"7","first_page":"8762","last_page":"8782"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7563384175300598},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.583249032497406},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5236879587173462},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.4858371913433075},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4440891444683075},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.44359642267227173},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2293337881565094},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.07654789090156555},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.04817444086074829}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7563384175300598},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.583249032497406},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5236879587173462},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.4858371913433075},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4440891444683075},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.44359642267227173},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2293337881565094},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.07654789090156555},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.04817444086074829},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2023.3236361","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3236361","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:37021882","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37021882","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5400000214576721,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G192882225","display_name":null,"funder_award_id":"U22B2036","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3794980888","display_name":null,"funder_award_id":"11931015","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8921145797","display_name":null,"funder_award_id":"U1836214","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":217,"referenced_works":["https://openalex.org/W582134693","https://openalex.org/W779494576","https://openalex.org/W1542595278","https://openalex.org/W1560021816","https://openalex.org/W1579979603","https://openalex.org/W1582436621","https://openalex.org/W1771410628","https://openalex.org/W1777239053","https://openalex.org/W1786044565","https://openalex.org/W1845972764","https://openalex.org/W1968333723","https://openalex.org/W1998176101","https://openalex.org/W2009551863","https://openalex.org/W2056354534","https://openalex.org/W2099618002","https://openalex.org/W2108114251","https://openalex.org/W2108677974","https://openalex.org/W2111764152","https://openalex.org/W2118688707","https://openalex.org/W2120889539","https://openalex.org/W2132022084","https://openalex.org/W2132908009","https://openalex.org/W2145339207","https://openalex.org/W2170899200","https://openalex.org/W2182000050","https://openalex.org/W2188721763","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2280163991","https://openalex.org/W2317082628","https://openalex.org/W2417786368","https://openalex.org/W2423557781","https://openalex.org/W2427917354","https://openalex.org/W2440926996","https://openalex.org/W2555811267","https://openalex.org/W2556477470","https://openalex.org/W2596982695","https://openalex.org/W2603088459","https://openalex.org/W2606757878","https://openalex.org/W2617547828","https://openalex.org/W2623491082","https://openalex.org/W2724169821","https://openalex.org/W2736601468","https://openalex.org/W2736629007","https://openalex.org/W2742000570","https://openalex.org/W2746553466","https://openalex.org/W2755611049","https://openalex.org/W2765302304","https://openalex.org/W2768498556","https://openalex.org/W2774354230","https://openalex.org/W2784465508","https://openalex.org/W2786928559","https://openalex.org/W2787236409","https://openalex.org/W2787284768","https://openalex.org/W2787938642","https://openalex.org/W2790759781","https://openalex.org/W2803728337","https://openalex.org/W2804380964","https://openalex.org/W2804791273","https://openalex.org/W2807588596","https://openalex.org/W2823112946","https://openalex.org/W2883433335","https://openalex.org/W2885550588","https://openalex.org/W2895453875","https://openalex.org/W2895961679","https://openalex.org/W2897417898","https://openalex.org/W2898585858","https://openalex.org/W2899077443","https://openalex.org/W2904157920","https://openalex.org/W2908064123","https://openalex.org/W2909335861","https://openalex.org/W2914261249","https://openalex.org/W2914351253","https://openalex.org/W2922388521","https://openalex.org/W2945774545","https://openalex.org/W2945850646","https://openalex.org/W2945887696","https://openalex.org/W2946723315","https://openalex.org/W2947526499","https://openalex.org/W2947766526","https://openalex.org/W2948764111","https://openalex.org/W2949352476","https://openalex.org/W2949475445","https://openalex.org/W2949561945","https://openalex.org/W2949682451","https://openalex.org/W2950794298","https://openalex.org/W2951191893","https://openalex.org/W2951799221","https://openalex.org/W2953100042","https://openalex.org/W2953326529","https://openalex.org/W2953772919","https://openalex.org/W2954076389","https://openalex.org/W2962719460","https://openalex.org/W2962802272","https://openalex.org/W2963049774","https://openalex.org/W2963254349","https://openalex.org/W2963276097","https://openalex.org/W2963321092","https://openalex.org/W2963359646","https://openalex.org/W2963403143","https://openalex.org/W2963438456","https://openalex.org/W2963523627","https://openalex.org/W2963616477","https://openalex.org/W2963797557","https://openalex.org/W2963864421","https://openalex.org/W2963870917","https://openalex.org/W2963871073","https://openalex.org/W2963946945","https://openalex.org/W2964043796","https://openalex.org/W2964053353","https://openalex.org/W2964067469","https://openalex.org/W2964096423","https://openalex.org/W2964118262","https://openalex.org/W2965676832","https://openalex.org/W2968284168","https://openalex.org/W2969456553","https://openalex.org/W2970272688","https://openalex.org/W2970393539","https://openalex.org/W2970948392","https://openalex.org/W2971154966","https://openalex.org/W2971442502","https://openalex.org/W2974778612","https://openalex.org/W2981038142","https://openalex.org/W2982238716","https://openalex.org/W2995298643","https://openalex.org/W2996246027","https://openalex.org/W2996549507","https://openalex.org/W2997289589","https://openalex.org/W3005980215","https://openalex.org/W3008543765","https://openalex.org/W3009331570","https://openalex.org/W3010970209","https://openalex.org/W3011042361","https://openalex.org/W3011584947","https://openalex.org/W3034404200","https://openalex.org/W3034769194","https://openalex.org/W3034871777","https://openalex.org/W3034956653","https://openalex.org/W3034962946","https://openalex.org/W3034973310","https://openalex.org/W3035542676","https://openalex.org/W3035717769","https://openalex.org/W3035880215","https://openalex.org/W3036329728","https://openalex.org/W3037940279","https://openalex.org/W3039222472","https://openalex.org/W3046330465","https://openalex.org/W3046395471","https://openalex.org/W3088614181","https://openalex.org/W3090570651","https://openalex.org/W3092804041","https://openalex.org/W3092872653","https://openalex.org/W3093426589","https://openalex.org/W3094490555","https://openalex.org/W3094542670","https://openalex.org/W3096954237","https://openalex.org/W3100789280","https://openalex.org/W3101710896","https://openalex.org/W3103559770","https://openalex.org/W3103732528","https://openalex.org/W3115293622","https://openalex.org/W3115706066","https://openalex.org/W3124229194","https://openalex.org/W3129322645","https://openalex.org/W3132674603","https://openalex.org/W3133280296","https://openalex.org/W3133438440","https://openalex.org/W3152399627","https://openalex.org/W3153676008","https://openalex.org/W3159199672","https://openalex.org/W3162309646","https://openalex.org/W3170511602","https://openalex.org/W3172115140","https://openalex.org/W3199151886","https://openalex.org/W3206005330","https://openalex.org/W3211360571","https://openalex.org/W3211731195","https://openalex.org/W3214770989","https://openalex.org/W4206547457","https://openalex.org/W4220747123","https://openalex.org/W4221146569","https://openalex.org/W4221161274","https://openalex.org/W4226116356","https://openalex.org/W4230563027","https://openalex.org/W4234228486","https://openalex.org/W4286979906","https://openalex.org/W4287116604","https://openalex.org/W4287660295","https://openalex.org/W4287725923","https://openalex.org/W4287756978","https://openalex.org/W4287779179","https://openalex.org/W4287867830","https://openalex.org/W4288029271","https://openalex.org/W4288090372","https://openalex.org/W4288091739","https://openalex.org/W4288289109","https://openalex.org/W4288594419","https://openalex.org/W4288614963","https://openalex.org/W4289107544","https://openalex.org/W4289761856","https://openalex.org/W4293469690","https://openalex.org/W4293545785","https://openalex.org/W4293586424","https://openalex.org/W4293845400","https://openalex.org/W4294225490","https://openalex.org/W4295128128","https://openalex.org/W4295598622","https://openalex.org/W4297627396","https://openalex.org/W4297791094","https://openalex.org/W4298876402","https://openalex.org/W4299802797","https://openalex.org/W4300799055","https://openalex.org/W4301369009","https://openalex.org/W4302570325","https://openalex.org/W4312609624","https://openalex.org/W4385245566","https://openalex.org/W4386240791"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,7],"learning":[2,8,88],"(DRL)":[3],"and":[4,27,31,60,80,108,127,160,180,191,212,225,228],"deep":[5,32,226],"multiagent":[6,128],"(MARL)":[9],"have":[10],"achieved":[11],"significant":[12],"success":[13],"across":[14],"a":[15,116,145,189,202,231],"wide":[16,58],"range":[17],"of":[18,44,148,195,204,221],"domains,":[19],"including":[20],"game":[21],"artificial":[22],"intelligence":[23],"(AI),":[24],"autonomous":[25],"vehicles,":[26],"robotics.":[28],"However,":[29],"DRL":[30,200,224],"MARL":[33,227],"agents":[34],"are":[35,46],"widely":[36],"known":[37],"to":[38,139,184,209],"be":[39],"sample":[40],"inefficient":[41],"that":[42,84],"millions":[43],"interactions":[45],"usually":[47],"needed":[48],"even":[49],"for":[50,123,199],"relatively":[51],"simple":[52],"problem":[53,94],"settings,":[54],"thus":[55],"preventing":[56],"the":[57,70,78,90,132,165,218],"application":[59],"deployment":[61],"in":[62,98,223],"real-industry":[63],"scenarios.":[64],"One":[65],"bottleneck":[66],"challenge":[67],"behind":[68],"is":[69],"well-known":[71],"exploration":[72,121,159,175,197,222],"problem,":[73],"i.e.,":[74],"how":[75],"efficiently":[76],"exploring":[77],"environment":[79],"collecting":[81],"informative":[82],"experiences":[83],"could":[85],"benefit":[86],"policy":[87],"toward":[89],"optimal":[91],"ones.":[92],"This":[93],"becomes":[95],"more":[96],"challenging":[97],"complex":[99],"environments":[100],"with":[101,177],"sparse":[102],"rewards,":[103],"noisy":[104],"distractions,":[105],"long":[106],"horizons,":[107],"nonstationary":[109],"co-learners.":[110],"In":[111,182],"this":[112],"article,":[113],"we":[114,143,170,187,215],"conduct":[115],"comprehensive":[117,190],"survey":[118,133,147],"on":[119,201],"existing":[120,149],"methods":[122,176,198],"both":[124],"single-agent":[125],"RL":[126],"RL.":[129],"We":[130],"start":[131],"by":[134,151],"identifying":[135],"several":[136],"key":[137],"challenges":[138],"efficient":[140],"exploration.":[141,163],"Then,":[142],"provide":[144,188],"systematic":[146],"approaches":[150],"classifying":[152],"them":[153],"into":[154],"two":[155,167],"major":[156],"categories:":[157],"uncertainty-oriented":[158],"intrinsic":[161],"motivation-oriented":[162],"Beyond":[164],"above":[166],"main":[168],"branches,":[169],"also":[171],"include":[172],"other":[173],"notable":[174],"different":[178,196],"ideas":[179],"techniques.":[181],"addition":[183],"algorithmic":[185,211],"analysis,":[186],"unified":[192],"empirical":[193,213],"comparison":[194],"set":[203],"commonly":[205],"used":[206],"benchmarks.":[207],"According":[208],"our":[210],"investigation,":[214],"finally":[216],"summarize":[217],"open":[219],"problems":[220],"point":[229],"out":[230],"few":[232],"future":[233],"directions.":[234]},"counts_by_year":[{"year":2026,"cited_by_count":16},{"year":2025,"cited_by_count":74},{"year":2024,"cited_by_count":45},{"year":2023,"cited_by_count":18}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
