{"id":"https://openalex.org/W3209247505","doi":"https://doi.org/10.1109/ccece53047.2021.9569056","title":"Reinforcement Learning Algorithms: An Overview and Classification","display_name":"Reinforcement Learning Algorithms: An Overview and Classification","publication_year":2021,"publication_date":"2021-09-12","ids":{"openalex":"https://openalex.org/W3209247505","doi":"https://doi.org/10.1109/ccece53047.2021.9569056","mag":"3209247505"},"language":"en","primary_location":{"id":"doi:10.1109/ccece53047.2021.9569056","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ccece53047.2021.9569056","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Canadian Conference on Electrical and Computer Engineering (CCECE)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2209.14940","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066059037","display_name":"Fadi AlMahamid","orcid":"https://orcid.org/0000-0002-6907-7626"},"institutions":[{"id":"https://openalex.org/I125749732","display_name":"Western University","ror":"https://ror.org/02grkyz14","country_code":"CA","type":"education","lineage":["https://openalex.org/I125749732"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Fadi AlMahamid","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Western University, London, Ontario, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Western University, London, Ontario, Canada","institution_ids":["https://openalex.org/I125749732"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012962681","display_name":"Katarina Grolinger","orcid":"https://orcid.org/0000-0003-0062-8212"},"institutions":[{"id":"https://openalex.org/I125749732","display_name":"Western University","ror":"https://ror.org/02grkyz14","country_code":"CA","type":"education","lineage":["https://openalex.org/I125749732"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Katarina Grolinger","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Western University, London, Ontario, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Western University, London, Ontario, Canada","institution_ids":["https://openalex.org/I125749732"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5066059037"],"corresponding_institution_ids":["https://openalex.org/I125749732"],"apc_list":null,"apc_paid":null,"fwci":6.5758,"has_fulltext":false,"cited_by_count":101,"citation_normalized_percentile":{"value":0.97234069,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9639999866485596,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9593999981880188,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8560555577278137},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.759720504283905},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6974984407424927},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6289726495742798},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.5594789981842041},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5050764679908752},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.46188777685165405},{"id":"https://openalex.org/keywords/learning-classifier-system","display_name":"Learning classifier system","score":0.44899481534957886},{"id":"https://openalex.org/keywords/robot-learning","display_name":"Robot learning","score":0.4317921996116638},{"id":"https://openalex.org/keywords/drone","display_name":"Drone","score":0.4311334192752838},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3690319061279297},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.2522982358932495},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.15790191292762756}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8560555577278137},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.759720504283905},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6974984407424927},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6289726495742798},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.5594789981842041},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5050764679908752},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.46188777685165405},{"id":"https://openalex.org/C199190896","wikidata":"https://www.wikidata.org/wiki/Q3509276","display_name":"Learning classifier system","level":3,"score":0.44899481534957886},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.4317921996116638},{"id":"https://openalex.org/C59519942","wikidata":"https://www.wikidata.org/wiki/Q650665","display_name":"Drone","level":2,"score":0.4311334192752838},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3690319061279297},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.2522982358932495},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.15790191292762756},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/ccece53047.2021.9569056","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ccece53047.2021.9569056","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Canadian Conference on Electrical and Computer Engineering (CCECE)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2209.14940","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.14940","pdf_url":"https://arxiv.org/pdf/2209.14940","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:ir.lib.uwo.ca:electricalpub-1559","is_oa":false,"landing_page_url":"https://ir.lib.uwo.ca/electricalpub/559","pdf_url":null,"source":{"id":"https://openalex.org/S4306400648","display_name":"Scholarship@Western (Western University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I125749732","host_organization_name":"Western University","host_organization_lineage":["https://openalex.org/I125749732"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Electrical and Computer Engineering Publications","raw_type":"conference"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2209.14940","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.14940","pdf_url":"https://arxiv.org/pdf/2209.14940","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W41554520","https://openalex.org/W1771410628","https://openalex.org/W2115211925","https://openalex.org/W2119717200","https://openalex.org/W2121092017","https://openalex.org/W2121863487","https://openalex.org/W2141559645","https://openalex.org/W2155027007","https://openalex.org/W2155894447","https://openalex.org/W2155968351","https://openalex.org/W2156737235","https://openalex.org/W2165150801","https://openalex.org/W2173248099","https://openalex.org/W2173564293","https://openalex.org/W2201581102","https://openalex.org/W2511837229","https://openalex.org/W2556958149","https://openalex.org/W2588283865","https://openalex.org/W2606757878","https://openalex.org/W2736601468","https://openalex.org/W2746553466","https://openalex.org/W2749928749","https://openalex.org/W2781726626","https://openalex.org/W2786036274","https://openalex.org/W2787938642","https://openalex.org/W2798705390","https://openalex.org/W2897540000","https://openalex.org/W2908019077","https://openalex.org/W2951799221","https://openalex.org/W2962938178","https://openalex.org/W2963095800","https://openalex.org/W2963407617","https://openalex.org/W2963477884","https://openalex.org/W2963674921","https://openalex.org/W2963864421","https://openalex.org/W2963923407","https://openalex.org/W2963956018","https://openalex.org/W2964043796","https://openalex.org/W2994967560","https://openalex.org/W3004924597","https://openalex.org/W3084269620","https://openalex.org/W3139377883","https://openalex.org/W3166656850","https://openalex.org/W4230563027","https://openalex.org/W4254755460","https://openalex.org/W4287325387","https://openalex.org/W4297797010","https://openalex.org/W4299802797","https://openalex.org/W4302570325","https://openalex.org/W6638018090","https://openalex.org/W6677939520","https://openalex.org/W6683107984","https://openalex.org/W6683195989","https://openalex.org/W6683204974","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6685444567","https://openalex.org/W6687681856","https://openalex.org/W6692846177","https://openalex.org/W6715102896","https://openalex.org/W6725708968","https://openalex.org/W6730111887","https://openalex.org/W6736680870","https://openalex.org/W6738796088","https://openalex.org/W6741002519","https://openalex.org/W6743802245","https://openalex.org/W6747473740","https://openalex.org/W6748638692","https://openalex.org/W6748839928","https://openalex.org/W6758126674","https://openalex.org/W6783140480","https://openalex.org/W6790969063","https://openalex.org/W6792155000"],"related_works":["https://openalex.org/W1966456942","https://openalex.org/W2930863966","https://openalex.org/W2754028433","https://openalex.org/W3148138296","https://openalex.org/W1882507001","https://openalex.org/W2126211886","https://openalex.org/W2367922714","https://openalex.org/W1986508893","https://openalex.org/W3153786280","https://openalex.org/W3127551068"],"abstract_inverted_index":{"The":[0,144],"desire":[1],"to":[2,12,60,66,131,175],"make":[3],"applications":[4],"and":[5,9,28,45,50,80,89,125,155,158,170,173],"machines":[6],"more":[7],"intelligent":[8],"the":[10,46,56,84,90,99,107,152,168,177],"aspiration":[11],"enable":[13],"their":[14,181],"operation":[15],"without":[16],"human":[17],"interaction":[18],"have":[19,54],"been":[20,37],"driving":[21],"innovations":[22],"in":[23,40,71,97,111,116],"neural":[24],"networks,":[25],"deep":[26],"learning,":[27],"other":[29],"machine":[30],"learning":[31,35,58,102,128],"techniques.":[32],"Although":[33],"reinforcement":[34,52,57,101,127],"has":[36],"primarily":[38],"used":[39],"video":[41,64],"games,":[42],"recent":[43],"advancements":[44],"development":[47],"of":[48,86,146],"diverse":[49],"powerful":[51],"algorithms":[53,129],"enabled":[55],"community":[59],"move":[61],"from":[62],"playing":[63],"games":[65],"solving":[67],"complex":[68],"real-life":[69],"problems":[70],"autonomous":[72],"systems":[73],"such":[74],"as":[75],"self-driving":[76],"cars,":[77],"delivery":[78],"drones,":[79],"automated":[81],"robotics.":[82],"Understanding":[83],"environment":[85,123,133],"an":[87,112],"application":[88],"algorithms'":[91,153],"limitations":[92],"plays":[93],"a":[94,165],"vital":[95],"role":[96],"selecting":[98],"appropriate":[100,178],"algorithm":[103,148,179],"that":[104],"successfully":[105],"solves":[106],"problem":[108],"on":[109,167],"hand":[110],"efficient":[113],"manner.":[114],"Consequently,":[115],"this":[117],"study,":[118],"we":[119,139],"identify":[120,140],"three":[121],"main":[122],"types":[124],"classify":[126],"according":[130],"those":[132],"types.":[134],"Moreover,":[135],"within":[136],"each":[137,147],"category,":[138],"relationships":[141],"between":[142],"algorithms.":[143,161],"overview":[145],"provides":[149,164],"insight":[150],"into":[151],"foundations":[154],"reviews":[156],"similarities":[157],"differences":[159],"among":[160],"This":[162],"study":[163],"perspective":[166],"field":[169],"helps":[171],"practitioners":[172],"researchers":[174],"select":[176],"for":[180],"use":[182],"case.":[183]},"counts_by_year":[{"year":2026,"cited_by_count":9},{"year":2025,"cited_by_count":45},{"year":2024,"cited_by_count":37},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":2}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
