{"id":"https://openalex.org/W4382050620","doi":"https://doi.org/10.1109/meco58584.2023.10155066","title":"An overview of reinforcement learning techniques","display_name":"An overview of reinforcement learning techniques","publication_year":2023,"publication_date":"2023-06-06","ids":{"openalex":"https://openalex.org/W4382050620","doi":"https://doi.org/10.1109/meco58584.2023.10155066"},"language":"en","primary_location":{"id":"doi:10.1109/meco58584.2023.10155066","is_oa":false,"landing_page_url":"https://doi.org/10.1109/meco58584.2023.10155066","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 12th Mediterranean Conference on Embedded Computing (MECO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040485614","display_name":"Damjan Pecioski","orcid":null},"institutions":[{"id":"https://openalex.org/I76245029","display_name":"Saints Cyril and Methodius University of Skopje","ror":"https://ror.org/02wk2vx54","country_code":"MK","type":"education","lineage":["https://openalex.org/I76245029"]}],"countries":["MK"],"is_corresponding":true,"raw_author_name":"Damjan Pecioski","raw_affiliation_strings":["Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia","Faculty of Mechanical Engineering-Skopje, Ss. Cyril and Methodius University in Skopje, Skopje, N. Macedonia"],"affiliations":[{"raw_affiliation_string":"Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia","institution_ids":["https://openalex.org/I76245029"]},{"raw_affiliation_string":"Faculty of Mechanical Engineering-Skopje, Ss. Cyril and Methodius University in Skopje, Skopje, N. Macedonia","institution_ids":["https://openalex.org/I76245029"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025391042","display_name":"Viktor Gavriloski","orcid":"https://orcid.org/0000-0002-0396-6500"},"institutions":[{"id":"https://openalex.org/I76245029","display_name":"Saints Cyril and Methodius University of Skopje","ror":"https://ror.org/02wk2vx54","country_code":"MK","type":"education","lineage":["https://openalex.org/I76245029"]}],"countries":["MK"],"is_corresponding":false,"raw_author_name":"Viktor Gavriloski","raw_affiliation_strings":["Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia","Faculty of Mechanical Engineering-Skopje, Ss. Cyril and Methodius University in Skopje, Skopje, N. Macedonia"],"affiliations":[{"raw_affiliation_string":"Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia","institution_ids":["https://openalex.org/I76245029"]},{"raw_affiliation_string":"Faculty of Mechanical Engineering-Skopje, Ss. Cyril and Methodius University in Skopje, Skopje, N. Macedonia","institution_ids":["https://openalex.org/I76245029"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035308663","display_name":"Simona Domazetovska","orcid":"https://orcid.org/0000-0001-8826-119X"},"institutions":[{"id":"https://openalex.org/I76245029","display_name":"Saints Cyril and Methodius University of Skopje","ror":"https://ror.org/02wk2vx54","country_code":"MK","type":"education","lineage":["https://openalex.org/I76245029"]}],"countries":["MK"],"is_corresponding":false,"raw_author_name":"Simona Domazetovska","raw_affiliation_strings":["Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia","Faculty of Mechanical Engineering-Skopje, Ss. Cyril and Methodius University in Skopje, Skopje, N. Macedonia"],"affiliations":[{"raw_affiliation_string":"Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia","institution_ids":["https://openalex.org/I76245029"]},{"raw_affiliation_string":"Faculty of Mechanical Engineering-Skopje, Ss. Cyril and Methodius University in Skopje, Skopje, N. Macedonia","institution_ids":["https://openalex.org/I76245029"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035286845","display_name":"Anastasija Ignjatovska","orcid":"https://orcid.org/0009-0000-1452-6597"},"institutions":[{"id":"https://openalex.org/I76245029","display_name":"Saints Cyril and Methodius University of Skopje","ror":"https://ror.org/02wk2vx54","country_code":"MK","type":"education","lineage":["https://openalex.org/I76245029"]}],"countries":["MK"],"is_corresponding":false,"raw_author_name":"Anastasija Ignjatovska","raw_affiliation_strings":["Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia","Faculty of Mechanical Engineering-Skopje, Ss. Cyril and Methodius University in Skopje, Skopje, N. Macedonia"],"affiliations":[{"raw_affiliation_string":"Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia","institution_ids":["https://openalex.org/I76245029"]},{"raw_affiliation_string":"Faculty of Mechanical Engineering-Skopje, Ss. Cyril and Methodius University in Skopje, Skopje, N. Macedonia","institution_ids":["https://openalex.org/I76245029"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5040485614"],"corresponding_institution_ids":["https://openalex.org/I76245029"],"apc_list":null,"apc_paid":null,"fwci":2.3959,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.90832441,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.978600025177002,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9689000248908997,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9049686789512634},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.81145840883255},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6316115856170654},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5931564569473267},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5788223147392273},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4765917956829071},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4509626030921936},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40672367811203003}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9049686789512634},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.81145840883255},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6316115856170654},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5931564569473267},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5788223147392273},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4765917956829071},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4509626030921936},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40672367811203003},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/meco58584.2023.10155066","is_oa":false,"landing_page_url":"https://doi.org/10.1109/meco58584.2023.10155066","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 12th Mediterranean Conference on Embedded Computing (MECO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W206679605","https://openalex.org/W1542941925","https://openalex.org/W1941445455","https://openalex.org/W2107726111","https://openalex.org/W2145339207","https://openalex.org/W2150468603","https://openalex.org/W2257979135","https://openalex.org/W2321292752","https://openalex.org/W2575705757","https://openalex.org/W2575731723","https://openalex.org/W2603920273","https://openalex.org/W2745300114","https://openalex.org/W2788115019","https://openalex.org/W2904455790","https://openalex.org/W2908261578","https://openalex.org/W2914316962","https://openalex.org/W2963864421","https://openalex.org/W2973184394","https://openalex.org/W2990850608","https://openalex.org/W3010723141","https://openalex.org/W3033770108","https://openalex.org/W3126321819","https://openalex.org/W3174032342","https://openalex.org/W3211145209","https://openalex.org/W4214717370","https://openalex.org/W4221141482","https://openalex.org/W4229038863","https://openalex.org/W4248238162","https://openalex.org/W4288601258","https://openalex.org/W4295150809","https://openalex.org/W6684921986","https://openalex.org/W6744562401","https://openalex.org/W6749032143","https://openalex.org/W6758595637","https://openalex.org/W6810378067"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W20361778","https://openalex.org/W2024136090","https://openalex.org/W2964765435"],"abstract_inverted_index":{"Writing":[0],"control":[1],"code":[2],"for":[3,162],"a":[4,17,29,76,113,137,144,160,169],"system":[5,108],"where":[6,39],"the":[7,40,53,59,92,97,122,133,177,211],"optimal":[8,54],"solution":[9,55],"is":[10,42,56,75,109,150,159,172],"not":[11],"known":[12],"in":[13,37,49],"advance":[14],"can":[15,34,69,140],"be":[16,35,70,129,141,207],"very":[18],"time-consuming":[19],"process.":[20],"The":[21],"Artificial":[22],"Intelligence":[23],"(AI)":[24],"methods":[25],"typically":[26],"involve":[27],"designing":[28,105],"set":[30],"of":[31,124],"rules":[32],"which":[33,63,88],"effective":[36],"situations":[38],"problem":[41,125],"precisely":[43],"defined":[44],"and":[45,66,183,193,198],"well":[46,132,189],"understood.":[47],"As":[48],"real":[50],"world":[51],"problems":[52],"rarely":[57],"known,":[58],"reinforcement":[60],"learning":[61,73,78],"framework":[62],"incorporates":[64],"trial":[65],"error":[67],"attempts":[68],"used.":[71],"Reinforcement":[72],"(RL)":[74],"machine":[77],"technique":[79],"that":[80,126,139],"involves":[81],"training":[82],"an":[83,106],"agent":[84,146,181,185],"to":[85,102,111,128,152],"make":[86,103],"decisions":[87],"are":[89,200],"based":[90],"on":[91,121],"feedback":[93],"it":[94,149],"receives":[95],"from":[96],"environment.":[98],"One":[99],"important":[100],"decision":[101,119],"when":[104,203],"RL":[107,155,182,186],"whether":[110],"use":[112,153],"single":[114,145,180],"or":[115],"multiple":[116,165],"agents.":[117],"This":[118],"depends":[120],"type":[123],"needs":[127],"solved":[130],"as":[131,188,190],"environment":[134],"complexity.":[135],"Having":[136],"goal":[138],"achieved":[142],"by":[143],"(one":[147],"player)":[148],"recommended":[151],"single-agent":[154],"while":[156],"if":[157],"there":[158],"need":[161],"coordination":[163],"between":[164,179],"agents":[166],"(players)":[167],"then":[168],"multi-agent":[170],"approach":[171,205],"recommended.":[173],"In":[174],"this":[175],"article,":[176],"differences":[178],"multi":[184],"techniques,":[187],"their":[191],"advantages":[192],"disadvantages":[194],"have":[195],"been":[196],"presented,":[197],"insights":[199],"provided":[201],"into":[202],"one":[204],"may":[206],"more":[208],"appropriate":[209],"than":[210],"other.":[212]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-22T08:38:42.863108","created_date":"2025-10-10T00:00:00"}
