{"id":"https://openalex.org/W4376478835","doi":"https://doi.org/10.1109/tsp.2023.3268475","title":"Towards Understanding Asynchronous Advantage Actor-Critic: Convergence and Linear Speedup","display_name":"Towards Understanding Asynchronous Advantage Actor-Critic: Convergence and Linear Speedup","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4376478835","doi":"https://doi.org/10.1109/tsp.2023.3268475"},"language":"en","primary_location":{"id":"doi:10.1109/tsp.2023.3268475","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsp.2023.3268475","pdf_url":null,"source":{"id":"https://openalex.org/S168680287","display_name":"IEEE Transactions on Signal Processing","issn_l":"1053-587X","issn":["1053-587X","1941-0476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001223413","display_name":"Han Shen","orcid":"https://orcid.org/0000-0001-8537-9862"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Han Shen","raw_affiliation_strings":["Department of Electrical, Computer, and Systems Engineering, Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":"https://orcid.org/0000-0001-8537-9862","affiliations":[{"raw_affiliation_string":"Department of Electrical, Computer, and Systems Engineering, Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047410441","display_name":"Kaiqing Zhang","orcid":"https://orcid.org/0000-0002-7446-7581"},"institutions":[{"id":"https://openalex.org/I4210143601","display_name":"Decision Systems (United States)","ror":"https://ror.org/0434dpa13","country_code":"US","type":"company","lineage":["https://openalex.org/I4210143601"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kaiqing Zhang","raw_affiliation_strings":["Laboratory for Information &amp; Decision Systems and Computer Science &amp; Artificial Intelligence Laboratory, Massachusetts Institute of Technology, Cambridge, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Laboratory for Information &amp; Decision Systems and Computer Science &amp; Artificial Intelligence Laboratory, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210143601","https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100633783","display_name":"Mingyi Hong","orcid":"https://orcid.org/0000-0003-1263-9365"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mingyi Hong","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Minnesota, Minneapolis, MN, USA"],"raw_orcid":"https://orcid.org/0000-0003-1263-9365","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Minnesota, Minneapolis, MN, USA","institution_ids":["https://openalex.org/I130238516"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100783476","display_name":"Tianyi Chen","orcid":"https://orcid.org/0000-0003-3477-1439"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianyi Chen","raw_affiliation_strings":["Department of Electrical, Computer, and Systems Engineering, Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":"https://orcid.org/0000-0003-3477-1439","affiliations":[{"raw_affiliation_string":"Department of Electrical, Computer, and Systems Engineering, Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.3843,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.96564087,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"71","issue":null,"first_page":"2579","last_page":"2594"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9692999720573425,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.7947482466697693},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6401818990707397},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6147674322128296},{"id":"https://openalex.org/keywords/notation","display_name":"Notation","score":0.5041755437850952},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47393709421157837},{"id":"https://openalex.org/keywords/weak-convergence","display_name":"Weak convergence","score":0.4291847348213196},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4142567813396454},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.412926584482193},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.39498695731163025},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3533005118370056},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.15221893787384033},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.1473865807056427},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10566109418869019},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07826682925224304}],"concepts":[{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.7947482466697693},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6401818990707397},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6147674322128296},{"id":"https://openalex.org/C45357846","wikidata":"https://www.wikidata.org/wiki/Q2001982","display_name":"Notation","level":2,"score":0.5041755437850952},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47393709421157837},{"id":"https://openalex.org/C57945734","wikidata":"https://www.wikidata.org/wiki/Q7977941","display_name":"Weak convergence","level":3,"score":0.4291847348213196},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4142567813396454},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.412926584482193},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.39498695731163025},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3533005118370056},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.15221893787384033},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.1473865807056427},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10566109418869019},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07826682925224304},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C160403385","wikidata":"https://www.wikidata.org/wiki/Q220543","display_name":"Queue","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsp.2023.3268475","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsp.2023.3268475","pdf_url":null,"source":{"id":"https://openalex.org/S168680287","display_name":"IEEE Transactions on Signal Processing","issn_l":"1053-587X","issn":["1053-587X","1941-0476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Signal Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":87,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1603765807","https://openalex.org/W1658008008","https://openalex.org/W1918371733","https://openalex.org/W2047364871","https://openalex.org/W2049469158","https://openalex.org/W2082261506","https://openalex.org/W2094387729","https://openalex.org/W2113501460","https://openalex.org/W2138243089","https://openalex.org/W2144446635","https://openalex.org/W2145339207","https://openalex.org/W2155027007","https://openalex.org/W2156737235","https://openalex.org/W2410733619","https://openalex.org/W2605994693","https://openalex.org/W2617960902","https://openalex.org/W2618059222","https://openalex.org/W2746390786","https://openalex.org/W2788115019","https://openalex.org/W2793035934","https://openalex.org/W2904435756","https://openalex.org/W2912747791","https://openalex.org/W2948432982","https://openalex.org/W2949585412","https://openalex.org/W2950395671","https://openalex.org/W2951923023","https://openalex.org/W2954388484","https://openalex.org/W2963616027","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2971587637","https://openalex.org/W2977813751","https://openalex.org/W2981237928","https://openalex.org/W3005226193","https://openalex.org/W3021175792","https://openalex.org/W3024652448","https://openalex.org/W3035141911","https://openalex.org/W3038006656","https://openalex.org/W3041129870","https://openalex.org/W3041202696","https://openalex.org/W3046626913","https://openalex.org/W3082963611","https://openalex.org/W3089075644","https://openalex.org/W3093528669","https://openalex.org/W3104164965","https://openalex.org/W3109546547","https://openalex.org/W3114616228","https://openalex.org/W3126016767","https://openalex.org/W3160923286","https://openalex.org/W3176417267","https://openalex.org/W3209208698","https://openalex.org/W4206889954","https://openalex.org/W4241811150","https://openalex.org/W4287758406","https://openalex.org/W4297797010","https://openalex.org/W6636881020","https://openalex.org/W6680402377","https://openalex.org/W6683195989","https://openalex.org/W6683204974","https://openalex.org/W6684921986","https://openalex.org/W6692846177","https://openalex.org/W6714879114","https://openalex.org/W6718105623","https://openalex.org/W6735579001","https://openalex.org/W6738250615","https://openalex.org/W6738480291","https://openalex.org/W6748638692","https://openalex.org/W6749032143","https://openalex.org/W6749032995","https://openalex.org/W6757082500","https://openalex.org/W6763002318","https://openalex.org/W6764419582","https://openalex.org/W6764460138","https://openalex.org/W6766497270","https://openalex.org/W6767434776","https://openalex.org/W6767941141","https://openalex.org/W6776367181","https://openalex.org/W6776824048","https://openalex.org/W6777944346","https://openalex.org/W6779101081","https://openalex.org/W6779279235","https://openalex.org/W6780089238","https://openalex.org/W6780576306","https://openalex.org/W6781166419","https://openalex.org/W6787694181","https://openalex.org/W6794209418"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2168758875","https://openalex.org/W2410733619","https://openalex.org/W4246549241","https://openalex.org/W2963483475","https://openalex.org/W1876383975","https://openalex.org/W3027194385"],"abstract_inverted_index":{"Asynchronous":[0],"and":[1,30,59,74,83,100,171,189],"parallel":[2],"implementation":[3],"of":[4,14,18,46,63,116,140,148,169],"standard":[5],"reinforcement":[6],"learning":[7],"(RL)":[8],"algorithms":[9,175],"is":[10,33,42,137],"a":[11],"key":[12],"enabler":[13],"the":[15,27,34,44,60,71,88,95,101,138,144,167,178],"tremendous":[16],"success":[17],"modern":[19],"RL.":[20],"Among":[21],"many":[22],"asynchronous":[23,35],"RL":[24],"algorithms,":[25],"arguably":[26],"most":[28],"popular":[29],"effective":[31],"one":[32],"advantage":[36,168],"actor-critic":[37],"(A3C)":[38],"algorithm.":[39],"Although":[40],"A3C":[41,72,93,112,158],"becoming":[43],"workhorse":[45],"RL,":[47],"its":[48,56,76],"theoretical":[49,198],"properties":[50],"are":[51],"still":[52],"not":[53],"well-understood,":[54],"including":[55],"non-asymptotic":[57,77],"analysis":[58],"performance":[61],"gain":[62],"parallelism":[64,170],"(a.k.a.":[65],"linear":[66],"speedup).":[67],"This":[68],"paper":[69],"revisits":[70],"algorithm":[73],"establishes":[75],"convergence":[78,90,103],"guarantees.":[79],"Under":[80,109],"both":[81],"i.i.d.":[82,110],"Markovian":[84],"sampling,":[85,111],"we":[86],"establish":[87],"local":[89],"guarantee":[91,104],"for":[92,155,177],"in":[94,105,173],"general":[96],"policy":[97,107],"approximation":[98],"case":[99],"global":[102],"softmax":[106],"parameterization.":[108],"obtains":[113],"sample":[114,146],"complexity":[115,147],"<inline-formula":[117,127,133,149],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[118,128,134,150,161],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[119,129,135,151],"notation=\"LaTeX\">$\\mathcal":[120,152],"{O}(\\epsilon":[121,153],"^{-2.5}/N)$</tex-math></inline-formula>":[122],"per":[123],"worker":[124],"to":[125,143,195],"achieve":[126],"notation=\"LaTeX\">$\\epsilon$</tex-math></inline-formula>":[130],"accuracy,":[131],"where":[132],"notation=\"LaTeX\">$N$</tex-math></inline-formula>":[136],"number":[139],"workers.":[141],"Compared":[142],"best-known":[145],"^{-2.5})$</tex-math></inline-formula>":[154],"two-timescale":[156],"AC,":[157],"achieves":[159],"<italic":[160],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">linear":[162],"speedup</i>":[163],",":[164],"which":[165],"justifies":[166],"asynchrony":[172],"AC":[174],"theoretically":[176],"first":[179],"time.":[180],"Numerical":[181],"tests":[182],"on":[183],"synthetic":[184],"environment,":[185],"OpenAI":[186],"Gym":[187],"environments":[188],"Atari":[190],"games":[191],"have":[192],"been":[193],"provided":[194],"verify":[196],"our":[197],"analysis.":[199]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
