{"id":"https://openalex.org/W4403059193","doi":"https://doi.org/10.1109/access.2024.3472473","title":"Comparative Analysis of A3C and PPO Algorithms in Reinforcement Learning: A Survey on General Environments","display_name":"Comparative Analysis of A3C and PPO Algorithms in Reinforcement Learning: A Survey on General Environments","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4403059193","doi":"https://doi.org/10.1109/access.2024.3472473"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3472473","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3472473","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2024.3472473","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058815085","display_name":"Alberto del R\u00edo","orcid":"https://orcid.org/0000-0002-6832-4381"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Alberto del Rio","raw_affiliation_strings":["Signals, Systems and Radiocommunications Department, Escuela T&#x00E9;cnica Superior de Ingenieros de Telecomunicaci&#x00F3;n (ETSIT), Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Signals, Systems and Radiocommunications Department, Escuela T&#x00E9;cnica Superior de Ingenieros de Telecomunicaci&#x00F3;n (ETSIT), Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015883220","display_name":"David Jim\u00e9nez","orcid":"https://orcid.org/0000-0002-7382-4276"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"David Jimenez","raw_affiliation_strings":["Physical Electronics, Electrical Engineering and Applied Physics Department, Escuela T&#x00E9;cnica Superior de Ingenieros de Telecomunicaci&#x00F3;n (ETSIT), Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Physical Electronics, Electrical Engineering and Applied Physics Department, Escuela T&#x00E9;cnica Superior de Ingenieros de Telecomunicaci&#x00F3;n (ETSIT), Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022183159","display_name":"Javier Serrano","orcid":"https://orcid.org/0000-0003-2111-187X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Javier Serrano","raw_affiliation_strings":["Informatic Systems Department, Escuela T&#x00E9;cnica Superior de Ingenier&#x00ED;a de Sistemas Inform&#x00E1;ticos (ETSISI), Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Informatic Systems Department, Escuela T&#x00E9;cnica Superior de Ingenier&#x00ED;a de Sistemas Inform&#x00E1;ticos (ETSISI), Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5058815085"],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":16.5103,"has_fulltext":false,"cited_by_count":48,"citation_normalized_percentile":{"value":0.99329868,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"12","issue":null,"first_page":"146795","last_page":"146806"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7555999755859375,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7555999755859375,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10551","display_name":"Scheduling and Optimization Algorithms","score":0.70169997215271,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7592960000038147},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7415851950645447},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.5058814883232117},{"id":"https://openalex.org/keywords/algorithm-design","display_name":"Algorithm design","score":0.44404345750808716},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4295971393585205},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3734084963798523}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7592960000038147},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7415851950645447},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.5058814883232117},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.44404345750808716},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4295971393585205},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3734084963798523}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/access.2024.3472473","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3472473","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:41fb2d9528384ef8bd7f7734010a711e","is_oa":true,"landing_page_url":"https://doaj.org/article/41fb2d9528384ef8bd7f7734010a711e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 146795-146806 (2024)","raw_type":"article"},{"id":"pmh:oai:zenodo.org:13959694","is_oa":true,"landing_page_url":"https://doi.org/10.1109/ACCESS.2024.3472473","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, 12, 146795 - 146806, (2024-10-02)","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3472473","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3472473","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5293818021","display_name":null,"funder_award_id":"101092696","funder_id":"https://openalex.org/F4320334322","funder_display_name":"HORIZON EUROPE Framework Programme"}],"funders":[{"id":"https://openalex.org/F4320334322","display_name":"HORIZON EUROPE Framework Programme","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":97,"referenced_works":["https://openalex.org/W1771410628","https://openalex.org/W2042708996","https://openalex.org/W2091565802","https://openalex.org/W2096018174","https://openalex.org/W2107726111","https://openalex.org/W2110292307","https://openalex.org/W2126565096","https://openalex.org/W2150468603","https://openalex.org/W2158782408","https://openalex.org/W2204090322","https://openalex.org/W2522489477","https://openalex.org/W2569028745","https://openalex.org/W2620387295","https://openalex.org/W2638556271","https://openalex.org/W2666874989","https://openalex.org/W2736601468","https://openalex.org/W2738318237","https://openalex.org/W2746553466","https://openalex.org/W2753176368","https://openalex.org/W2775408020","https://openalex.org/W2787933113","https://openalex.org/W2877093712","https://openalex.org/W2897007254","https://openalex.org/W2897475915","https://openalex.org/W2897661175","https://openalex.org/W2922485250","https://openalex.org/W2923653485","https://openalex.org/W2959334635","https://openalex.org/W2963095800","https://openalex.org/W2963317745","https://openalex.org/W2963428623","https://openalex.org/W2963842088","https://openalex.org/W2973379954","https://openalex.org/W2981402159","https://openalex.org/W2982316857","https://openalex.org/W2989779259","https://openalex.org/W2990123902","https://openalex.org/W3006486026","https://openalex.org/W3021855334","https://openalex.org/W3027406032","https://openalex.org/W3034815680","https://openalex.org/W3045059767","https://openalex.org/W3088310808","https://openalex.org/W3089594962","https://openalex.org/W3090903721","https://openalex.org/W3091037773","https://openalex.org/W3092277419","https://openalex.org/W3093528669","https://openalex.org/W3108144771","https://openalex.org/W3113139432","https://openalex.org/W3113470404","https://openalex.org/W3120778962","https://openalex.org/W3121342653","https://openalex.org/W3124201714","https://openalex.org/W3149125616","https://openalex.org/W3157937972","https://openalex.org/W3162079807","https://openalex.org/W3175558129","https://openalex.org/W3185904346","https://openalex.org/W3188721324","https://openalex.org/W3200466256","https://openalex.org/W3210967257","https://openalex.org/W3216656735","https://openalex.org/W4210271893","https://openalex.org/W4210638461","https://openalex.org/W4214647972","https://openalex.org/W4220747123","https://openalex.org/W4285136202","https://openalex.org/W4295185264","https://openalex.org/W4296079718","https://openalex.org/W4298857966","https://openalex.org/W4306362588","https://openalex.org/W4311486598","https://openalex.org/W4362722548","https://openalex.org/W4367727829","https://openalex.org/W4382053099","https://openalex.org/W4383112908","https://openalex.org/W4385830952","https://openalex.org/W4386159259","https://openalex.org/W4389332621","https://openalex.org/W4389722555","https://openalex.org/W4390821388","https://openalex.org/W4392524845","https://openalex.org/W4392583720","https://openalex.org/W4393181759","https://openalex.org/W6637770816","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6692846177","https://openalex.org/W6729972426","https://openalex.org/W6730111887","https://openalex.org/W6741002519","https://openalex.org/W6767858076","https://openalex.org/W6780559895","https://openalex.org/W6795803320","https://openalex.org/W6810738896","https://openalex.org/W6922480057"],"related_works":["https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W3087814763","https://openalex.org/W4400868993","https://openalex.org/W2361647908","https://openalex.org/W2937181779","https://openalex.org/W2537866915","https://openalex.org/W2089415692"],"abstract_inverted_index":{"This":[0],"research":[1],"article":[2],"presents":[3],"a":[4,54,121],"comparison":[5],"between":[6,151],"two":[7,27],"mainstream":[8],"Deep":[9],"Reinforcement":[10],"Learning":[11],"(DRL)":[12],"algorithms,":[13],"Asynchronous":[14],"Advantage":[15],"Actor-Critic":[16],"(A3C)":[17],"and":[18,31,49,69,80,87,99,154],"Proximal":[19],"Policy":[20],"Optimization":[21],"(PPO),":[22],"in":[23,42,60,94,115,140],"the":[24,78,127,136],"context":[25],"of":[26,57,96,129,135,142],"diverse":[28],"environments:":[29],"CartPole":[30,79],"Lunar":[32,81],"Lander.":[33],"DRL":[34],"algorithms":[35],"are":[36],"widely":[37],"known":[38],"for":[39,65,159,169],"their":[40,58,67,92],"effectiveness":[41,59],"training":[43,109,124,152,172],"agents":[44],"to":[45],"navigate":[46],"complex":[47],"environments":[48,83],"achieve":[50],"optimal":[51],"policies.":[52],"Nevertheless,":[53],"methodical":[55],"assessment":[56],"various":[61],"settings":[62],"is":[63,138,157,166],"crucial":[64],"comprehending":[66],"advantages":[68],"disadvantages.":[70],"In":[71],"this":[72],"study,":[73],"we":[74],"conduct":[75],"experiments":[76],"on":[77,146],"Lander":[82],"using":[84],"both":[85],"A3C":[86,105,156],"PPO":[88,119,165],"algorithms.":[89],"We":[90],"compare":[91],"performance":[93],"terms":[95,141],"convergence":[97],"speed":[98],"stability.":[100,155,173],"Our":[101],"results":[102],"indicate":[103],"that":[104],"typically":[106],"achieves":[107],"quicker":[108],"times,":[110],"but":[111],"exhibits":[112],"greater":[113],"instability":[114],"reward":[116],"values.":[117],"Conversely,":[118],"demonstrates":[120],"more":[122],"stable":[123],"process":[125],"at":[126],"expense":[128],"longer":[130],"execution":[131],"times.":[132],"An":[133],"evaluation":[134],"environment":[137],"needed":[139],"algorithm":[143],"selection,":[144],"based":[145],"specific":[147],"application":[148],"needs,":[149],"balancing":[150],"time":[153],"ideal":[158],"applications":[160],"requiring":[161],"rapid":[162],"training,":[163],"while":[164],"better":[167],"suited":[168],"those":[170],"prioritizing":[171]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":41},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
