{"id":"https://openalex.org/W3027406032","doi":"https://doi.org/10.1145/3459991","title":"A Survey of Reinforcement Learning Algorithms for Dynamically Varying Environments","display_name":"A Survey of Reinforcement Learning Algorithms for Dynamically Varying Environments","publication_year":2021,"publication_date":"2021-07-13","ids":{"openalex":"https://openalex.org/W3027406032","doi":"https://doi.org/10.1145/3459991","mag":"3027406032"},"language":"en","primary_location":{"id":"doi:10.1145/3459991","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3459991","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"type":"review","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2005.10619","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040002468","display_name":"Sindhu Padakandla","orcid":"https://orcid.org/0000-0003-3385-294X"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Sindhu Padakandla","raw_affiliation_strings":["Department of Computer Science and Automation, Indian Institute of Science, Bangalore, Karnataka, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Automation, Indian Institute of Science, Bangalore, Karnataka, India","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5040002468"],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":17.4708,"has_fulltext":false,"cited_by_count":200,"citation_normalized_percentile":{"value":0.99632627,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"54","issue":"6","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8338495492935181},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7975478768348694},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.6121355295181274},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5957856178283691},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.5558425784111023},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.5202171206474304},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4843915104866028},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.46434885263442993},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.43504345417022705},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3252781331539154}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8338495492935181},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7975478768348694},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.6121355295181274},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5957856178283691},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.5558425784111023},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.5202171206474304},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4843915104866028},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.46434885263442993},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.43504345417022705},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3252781331539154},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3459991","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3459991","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},{"id":"pmh:oai:eprints.iisc.ac.in:69872","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401429","display_name":"ePrints@IISc (Indian Institute of Science)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I59270414","host_organization_name":"Indian Institute of Science Bangalore","host_organization_lineage":["https://openalex.org/I59270414"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},{"id":"pmh:oai:arXiv.org:2005.10619","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2005.10619","pdf_url":"https://arxiv.org/pdf/2005.10619","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2005.10619","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2005.10619","pdf_url":"https://arxiv.org/pdf/2005.10619","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5600000023841858,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":82,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W49673716","https://openalex.org/W178169250","https://openalex.org/W195596278","https://openalex.org/W594357522","https://openalex.org/W1497826971","https://openalex.org/W1506859583","https://openalex.org/W1587845729","https://openalex.org/W1771410628","https://openalex.org/W1845972764","https://openalex.org/W1849095486","https://openalex.org/W1850488217","https://openalex.org/W1977655452","https://openalex.org/W1983470923","https://openalex.org/W1993167666","https://openalex.org/W2001825424","https://openalex.org/W2004094275","https://openalex.org/W2009303086","https://openalex.org/W2046660101","https://openalex.org/W2051903196","https://openalex.org/W2077723394","https://openalex.org/W2099618002","https://openalex.org/W2115524942","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2145339207","https://openalex.org/W2152670157","https://openalex.org/W2155027007","https://openalex.org/W2156371714","https://openalex.org/W2162926979","https://openalex.org/W2165116724","https://openalex.org/W2174227032","https://openalex.org/W2175190893","https://openalex.org/W2210543184","https://openalex.org/W2344944957","https://openalex.org/W2529605558","https://openalex.org/W2582998992","https://openalex.org/W2596785231","https://openalex.org/W2604763608","https://openalex.org/W2725582697","https://openalex.org/W2732645484","https://openalex.org/W2739785516","https://openalex.org/W2749610436","https://openalex.org/W2788388592","https://openalex.org/W2792071139","https://openalex.org/W2807973248","https://openalex.org/W2808432212","https://openalex.org/W2809162153","https://openalex.org/W2911197199","https://openalex.org/W2913668833","https://openalex.org/W2914557154","https://openalex.org/W2914584698","https://openalex.org/W2918932727","https://openalex.org/W2942369252","https://openalex.org/W2942608247","https://openalex.org/W2953199352","https://openalex.org/W2954737642","https://openalex.org/W2963026770","https://openalex.org/W2963094322","https://openalex.org/W2963176272","https://openalex.org/W2963390429","https://openalex.org/W2963887494","https://openalex.org/W2964222840","https://openalex.org/W2964254877","https://openalex.org/W2966818373","https://openalex.org/W2967257869","https://openalex.org/W2972710806","https://openalex.org/W2982306485","https://openalex.org/W2996726407","https://openalex.org/W3003931103","https://openalex.org/W3008102108","https://openalex.org/W3010754899","https://openalex.org/W3011337332","https://openalex.org/W3014312017","https://openalex.org/W3023161348","https://openalex.org/W3080482303","https://openalex.org/W3100201759","https://openalex.org/W3123212791","https://openalex.org/W4211208325","https://openalex.org/W4212774754","https://openalex.org/W4246329541","https://openalex.org/W4292022450"],"related_works":["https://openalex.org/W2165912799","https://openalex.org/W2735662278","https://openalex.org/W2382615723","https://openalex.org/W4311804456","https://openalex.org/W1987484445","https://openalex.org/W2623658258","https://openalex.org/W2143413548","https://openalex.org/W1969219540","https://openalex.org/W2370459448","https://openalex.org/W2105067402"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,121],"(RL)":[2],"algorithms":[3,65,149],"find":[4],"applications":[5,42],"in":[6,22,40,152,154],"inventory":[7],"control,":[8],"recommender":[9],"systems,":[10],"vehicular":[11],"traffic":[12],"management,":[13],"cloud":[14],"computing,":[15],"and":[16,47,161,165],"robotics.":[17],"The":[18,91],"real-world":[19],"complications":[20],"arising":[21],"these":[23,41,148],"domains":[24],"makes":[25],"them":[26],"difficult":[27],"to":[28,45,49,102,107,137,175],"solve":[29],"with":[30,158],"the":[31,67,98,117,132,141],"basic":[32],"assumptions":[33],"underlying":[34,68,142],"classical":[35],"RL":[36,38,60,82,123,133],"algorithms.":[37],"agents":[39,105],"often":[43],"need":[44],"react":[46],"adapt":[48,106],"changing":[50],"operating":[51,109],"conditions.":[52,110],"A":[53,144],"significant":[54],"part":[55],"of":[56,70,81,93,140,147],"research":[57],"on":[58,63],"single-agent":[59],"techniques":[61],"focuses":[62],"developing":[64],"when":[66],"assumption":[69,100],"stationary":[71],"environment":[72,89],"model":[73],"is":[74,101,112,150],"relaxed.":[75],"This":[76,111],"article":[77],"provides":[78],"a":[79,128],"survey":[80],"methods":[83,94],"developed":[84],"for":[85,131,183],"handling":[86],"dynamically":[87],"varying":[88,108],"models.":[90],"goal":[92],"not":[95],"limited":[96],"by":[97,115,122,126],"stationarity":[99],"help":[103],"autonomous":[104],"possible":[113],"either":[114],"minimizing":[116],"rewards":[118],"lost":[119],"during":[120],"agent":[124,134],"or":[125],"finding":[127],"suitable":[129],"policy":[130],"that":[135,172],"leads":[136],"efficient":[138],"operation":[139],"system.":[143],"representative":[145],"collection":[146],"discussed":[151],"detail":[153],"this":[155,184],"work":[156],"along":[157],"their":[159,162],"categorization":[160],"relative":[163],"merits":[164],"demerits.":[166],"Additionally,":[167],"we":[168,179],"also":[169],"review":[170],"works":[171],"are":[173],"tailored":[174],"application":[176],"domains.":[177],"Finally,":[178],"discuss":[180],"future":[181],"enhancements":[182],"field.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":12},{"year":2025,"cited_by_count":58},{"year":2024,"cited_by_count":50},{"year":2023,"cited_by_count":25},{"year":2022,"cited_by_count":34},{"year":2021,"cited_by_count":17},{"year":2020,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
