{"id":"https://openalex.org/W4386869777","doi":"https://doi.org/10.1109/tai.2023.3316637","title":"Incremental Reinforcement Learning via Performance Evaluation and Policy Perturbation","display_name":"Incremental Reinforcement Learning via Performance Evaluation and Policy Perturbation","publication_year":2023,"publication_date":"2023-09-19","ids":{"openalex":"https://openalex.org/W4386869777","doi":"https://doi.org/10.1109/tai.2023.3316637"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2023.3316637","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2023.3316637","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036760439","display_name":"Guizhou Deng","orcid":"https://orcid.org/0000-0001-6589-4402"},"institutions":[{"id":"https://openalex.org/I1297991670","display_name":"Southwest University of Science and Technology","ror":"https://ror.org/04d996474","country_code":"CN","type":"education","lineage":["https://openalex.org/I1297991670"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guizhou Deng","raw_affiliation_strings":["Mechanical Engineering, School of Manufacturing Science and Engineering, Southwest University of Science and Technology, Mianyang, China"],"raw_orcid":"https://orcid.org/0000-0001-6589-4402","affiliations":[{"raw_affiliation_string":"Mechanical Engineering, School of Manufacturing Science and Engineering, Southwest University of Science and Technology, Mianyang, China","institution_ids":["https://openalex.org/I1297991670"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025551679","display_name":"Huiqiao Fu","orcid":"https://orcid.org/0000-0001-9403-2449"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huiqiao Fu","raw_affiliation_strings":["Department of Control and Systems Engineering, School of Management and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0001-9403-2449","affiliations":[{"raw_affiliation_string":"Department of Control and Systems Engineering, School of Management and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114860490","display_name":"Xinpeng Wang","orcid":"https://orcid.org/0000-0002-3031-0056"},"institutions":[{"id":"https://openalex.org/I1297991670","display_name":"Southwest University of Science and Technology","ror":"https://ror.org/04d996474","country_code":"CN","type":"education","lineage":["https://openalex.org/I1297991670"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinpeng Wang","raw_affiliation_strings":["Mechanical Engineering, School of Manufacturing Science and Engineering, Southwest University of Science and Technology, Mianyang, China"],"raw_orcid":"https://orcid.org/0000-0002-3031-0056","affiliations":[{"raw_affiliation_string":"Mechanical Engineering, School of Manufacturing Science and Engineering, Southwest University of Science and Technology, Mianyang, China","institution_ids":["https://openalex.org/I1297991670"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047792053","display_name":"Canghai Liu","orcid":"https://orcid.org/0009-0004-4890-1825"},"institutions":[{"id":"https://openalex.org/I1297991670","display_name":"Southwest University of Science and Technology","ror":"https://ror.org/04d996474","country_code":"CN","type":"education","lineage":["https://openalex.org/I1297991670"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Canghai Liu","raw_affiliation_strings":["Mechanical Engineering, School of Manufacturing Science and Engineering, Southwest University of Science and Technology, Mianyang, China"],"raw_orcid":"https://orcid.org/0009-0004-4890-1825","affiliations":[{"raw_affiliation_string":"Mechanical Engineering, School of Manufacturing Science and Engineering, Southwest University of Science and Technology, Mianyang, China","institution_ids":["https://openalex.org/I1297991670"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031185739","display_name":"Kaiqiang Tang","orcid":"https://orcid.org/0000-0002-7456-0962"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiqiang Tang","raw_affiliation_strings":["Department of Control and Systems Engineering, School of Management and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-7456-0962","affiliations":[{"raw_affiliation_string":"Department of Control and Systems Engineering, School of Management and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100697167","display_name":"Chunlin Chen","orcid":"https://orcid.org/0000-0003-3929-4707"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunlin Chen","raw_affiliation_strings":["Department of Control and Systems Engineering, School of Management and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-3929-4707","affiliations":[{"raw_affiliation_string":"Department of Control and Systems Engineering, School of Management and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3263,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.65405874,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"5","issue":"5","first_page":"2253","last_page":"2263"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8867666721343994},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.8034075498580933},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.7176370620727539},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.629426121711731},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5292738676071167},{"id":"https://openalex.org/keywords/perturbation","display_name":"Perturbation (astronomy)","score":0.4836050570011139},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3879459500312805},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36238616704940796},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1147051751613617},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.09821876883506775},{"id":"https://openalex.org/keywords/management","display_name":"Management","score":0.08813181519508362},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.0819922685623169}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8867666721343994},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.8034075498580933},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.7176370620727539},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.629426121711731},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5292738676071167},{"id":"https://openalex.org/C177918212","wikidata":"https://www.wikidata.org/wiki/Q803623","display_name":"Perturbation (astronomy)","level":2,"score":0.4836050570011139},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3879459500312805},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36238616704940796},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1147051751613617},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.09821876883506775},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.08813181519508362},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0819922685623169},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2023.3316637","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2023.3316637","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7996831764","display_name":null,"funder_award_id":"62073160","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W41554520","https://openalex.org/W2032277247","https://openalex.org/W2139047213","https://openalex.org/W2158782408","https://openalex.org/W2165698076","https://openalex.org/W2259258048","https://openalex.org/W2623491082","https://openalex.org/W2736601468","https://openalex.org/W2790277807","https://openalex.org/W2791387407","https://openalex.org/W2819774736","https://openalex.org/W2908261578","https://openalex.org/W2911087563","https://openalex.org/W2912681837","https://openalex.org/W2917322258","https://openalex.org/W2963428623","https://openalex.org/W2965407115","https://openalex.org/W2966477753","https://openalex.org/W2973229164","https://openalex.org/W3010768390","https://openalex.org/W3027936473","https://openalex.org/W3041133507","https://openalex.org/W3046093665","https://openalex.org/W3091795298","https://openalex.org/W3093655779","https://openalex.org/W3100789280","https://openalex.org/W3101214305","https://openalex.org/W3130046404","https://openalex.org/W3130272317","https://openalex.org/W3132175036","https://openalex.org/W3148740559","https://openalex.org/W3154227779","https://openalex.org/W3163842339","https://openalex.org/W3191163823","https://openalex.org/W3204245305","https://openalex.org/W3213496246","https://openalex.org/W4225108957","https://openalex.org/W4237591687","https://openalex.org/W4317038451","https://openalex.org/W6630907848","https://openalex.org/W6638018090","https://openalex.org/W6675999342","https://openalex.org/W6680235470","https://openalex.org/W6683204974","https://openalex.org/W6704571135","https://openalex.org/W6717367658","https://openalex.org/W6739193204","https://openalex.org/W6741002519","https://openalex.org/W6751617248","https://openalex.org/W6752601656"],"related_works":["https://openalex.org/W2357124094","https://openalex.org/W2387399993","https://openalex.org/W2389739210","https://openalex.org/W2348924972","https://openalex.org/W2365736347","https://openalex.org/W2047454415","https://openalex.org/W2070040999","https://openalex.org/W2387293848","https://openalex.org/W2250140200","https://openalex.org/W351285018"],"abstract_inverted_index":{"Rapid":[0],"adaptation":[1],"to":[2,44,82,95,113,117,134,138,165],"the":[3,6,46,78,88,97,101,108,114,119,126,136,141,145,150,155,166],"environment":[4,90],"is":[5,110,132],"long-term":[7],"task":[8],"of":[9,48,144,154],"reinforcement":[10,13,49],"learning.":[11],"However,":[12],"learning":[14,50],"faces":[15],"great":[16],"challenges":[17],"in":[18,52,73,87,100,105,125,163],"dynamic":[19,53],"environments,":[20],"especially":[21],"with":[22,55],"continuous":[23,56,160],"state-action":[24,57],"spaces.":[25],"In":[26],"this":[27],"paper,":[28],"we":[29,76],"propose":[30],"a":[31,84],"systematic":[32],"Incremental":[33],"Reinforcement":[34],"Learning":[35],"method":[36,158],"via":[37],"Performance":[38],"Evaluation":[39],"and":[40,69,91,123,152],"Policy":[41],"Perturbation":[42],"(IRL-PEPP)":[43],"improve":[45],"adaptability":[47],"algorithms":[51],"environments":[54],"spaces,":[58],"which":[59],"mainly":[60],"includes":[61],"three":[62],"parts,":[63],"i.e.,":[64],"performance":[65,74],"evaluation,":[66,75],"policy":[67,81,98,106,109,115],"perturbation":[68],"importance":[70,130],"weighting.":[71],"Firstly,":[72],"apply":[77],"learned":[79],"optimal":[80],"sample":[83],"few":[85],"episodes":[86],"original":[89],"use":[92],"these":[93],"samples":[94],"evaluate":[96],"applicability":[99,116],"new":[102,127],"environment.":[103,128],"Then,":[104],"perturbation,":[107],"perturbed":[111],"according":[112],"balance":[118],"trade-off":[120],"between":[121],"exploration":[122],"exploitation":[124],"Finally,":[129],"weighting":[131],"applied":[133],"weight":[135],"information":[137],"speed":[139],"up":[140],"adjustment":[142],"process":[143],"policy.":[146],"Experimental":[147],"results":[148],"demonstrate":[149],"feasibility":[151],"efficiency":[153],"proposed":[156],"IRL-PEPP":[157],"for":[159],"control":[161],"tasks":[162],"comparison":[164],"existing":[167],"state-of-the-art":[168],"methods.":[169]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
