{"id":"https://openalex.org/W3160923286","doi":"https://doi.org/10.1109/jsait.2021.3078754","title":"On Finite-Time Convergence of Actor-Critic Algorithm","display_name":"On Finite-Time Convergence of Actor-Critic Algorithm","publication_year":2021,"publication_date":"2021-05-19","ids":{"openalex":"https://openalex.org/W3160923286","doi":"https://doi.org/10.1109/jsait.2021.3078754","mag":"3160923286"},"language":"en","primary_location":{"id":"doi:10.1109/jsait.2021.3078754","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jsait.2021.3078754","pdf_url":null,"source":{"id":"https://openalex.org/S4210211895","display_name":"IEEE Journal on Selected Areas in Information Theory","issn_l":"2641-8770","issn":["2641-8770"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal on Selected Areas in Information Theory","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103213699","display_name":"Shuang Qiu","orcid":"https://orcid.org/0000-0002-9651-1061"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shuang Qiu","raw_affiliation_strings":["University of Michigan, Ann Arbor, MI, USA","Department of Electrical Engineering and Computer Science, University of Michigan, Ann Arbor MI, USA"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of Michigan, Ann Arbor MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727948","display_name":"Zhuoran Yang","orcid":"https://orcid.org/0000-0001-5269-9958"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhuoran Yang","raw_affiliation_strings":["Princeton University, Princeton, NJ, USA","Dept. of Operations Res. & Financial Eng., Princeton Univ. Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]},{"raw_affiliation_string":"Dept. of Operations Res. & Financial Eng., Princeton Univ. Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010419481","display_name":"Jieping Ye","orcid":"https://orcid.org/0000-0001-8662-5818"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jieping Ye","raw_affiliation_strings":["University of Michigan, Ann Arbor, MI, USA","Department of Computational Medicine and Bioinformatics and the Department of Electrical Engineering and Computer Science, University of Michigan, Ann Arbor, MI, USA"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]},{"raw_affiliation_string":"Department of Computational Medicine and Bioinformatics and the Department of Electrical Engineering and Computer Science, University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101934110","display_name":"Zhaoran Wang","orcid":"https://orcid.org/0000-0002-1824-2580"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhaoran Wang","raw_affiliation_strings":["Northwestern University, Evanston, IL, USA","Department of Industrial Engineering and Management Sciences and the Department of Computer Science, Northwestern University, Evanston, IL, USA"],"affiliations":[{"raw_affiliation_string":"Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]},{"raw_affiliation_string":"Department of Industrial Engineering and Management Sciences and the Department of Computer Science, Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5103213699"],"corresponding_institution_ids":["https://openalex.org/I27837315"],"apc_list":null,"apc_paid":null,"fwci":3.942,"has_fulltext":false,"cited_by_count":39,"citation_normalized_percentile":{"value":0.94528795,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"2","issue":"2","first_page":"652","last_page":"664"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/iterated-function","display_name":"Iterated function","score":0.5889742970466614},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.586930513381958},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.575627863407135},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.49298736453056335},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.486714631319046},{"id":"https://openalex.org/keywords/ordinary-differential-equation","display_name":"Ordinary differential equation","score":0.47630035877227783},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4514625370502472},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.43370506167411804},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4260384738445282},{"id":"https://openalex.org/keywords/sublinear-function","display_name":"Sublinear function","score":0.4242526590824127},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.41360944509506226},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.4125828742980957},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3166744112968445},{"id":"https://openalex.org/keywords/differential-equation","display_name":"Differential equation","score":0.2149055004119873}],"concepts":[{"id":"https://openalex.org/C140479938","wikidata":"https://www.wikidata.org/wiki/Q5254619","display_name":"Iterated function","level":2,"score":0.5889742970466614},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.586930513381958},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.575627863407135},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.49298736453056335},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.486714631319046},{"id":"https://openalex.org/C51544822","wikidata":"https://www.wikidata.org/wiki/Q465274","display_name":"Ordinary differential equation","level":3,"score":0.47630035877227783},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4514625370502472},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.43370506167411804},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4260384738445282},{"id":"https://openalex.org/C117160843","wikidata":"https://www.wikidata.org/wiki/Q338652","display_name":"Sublinear function","level":2,"score":0.4242526590824127},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.41360944509506226},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.4125828742980957},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3166744112968445},{"id":"https://openalex.org/C78045399","wikidata":"https://www.wikidata.org/wiki/Q11214","display_name":"Differential equation","level":2,"score":0.2149055004119873},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jsait.2021.3078754","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jsait.2021.3078754","pdf_url":null,"source":{"id":"https://openalex.org/S4210211895","display_name":"IEEE Journal on Selected Areas in Information Theory","issn_l":"2641-8770","issn":["2641-8770"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal on Selected Areas in Information Theory","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8100000023841858,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":94,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W192920577","https://openalex.org/W594357522","https://openalex.org/W1499021337","https://openalex.org/W1501203061","https://openalex.org/W1597303641","https://openalex.org/W1771410628","https://openalex.org/W2071983464","https://openalex.org/W2072931156","https://openalex.org/W2075268401","https://openalex.org/W2079104779","https://openalex.org/W2079135578","https://openalex.org/W2094387729","https://openalex.org/W2100677568","https://openalex.org/W2101915445","https://openalex.org/W2121703796","https://openalex.org/W2121863487","https://openalex.org/W2124659975","https://openalex.org/W2130801532","https://openalex.org/W2136602922","https://openalex.org/W2139418546","https://openalex.org/W2141091023","https://openalex.org/W2155027007","https://openalex.org/W2156737235","https://openalex.org/W2161270100","https://openalex.org/W2165150801","https://openalex.org/W2165905123","https://openalex.org/W2172968643","https://openalex.org/W2173248099","https://openalex.org/W2173945562","https://openalex.org/W2257979135","https://openalex.org/W2395162158","https://openalex.org/W2473364827","https://openalex.org/W2594203335","https://openalex.org/W2614367549","https://openalex.org/W2617290556","https://openalex.org/W2736601468","https://openalex.org/W2752458704","https://openalex.org/W2766447205","https://openalex.org/W2795561155","https://openalex.org/W2798766386","https://openalex.org/W2885549115","https://openalex.org/W2903336542","https://openalex.org/W2912747791","https://openalex.org/W2920961312","https://openalex.org/W2950556355","https://openalex.org/W2950882283","https://openalex.org/W2952164720","https://openalex.org/W2963250930","https://openalex.org/W2963864421","https://openalex.org/W2964123095","https://openalex.org/W2964203948","https://openalex.org/W2966363432","https://openalex.org/W2970999177","https://openalex.org/W2977813751","https://openalex.org/W2981237928","https://openalex.org/W2988607432","https://openalex.org/W2996964934","https://openalex.org/W3038006656","https://openalex.org/W3041129870","https://openalex.org/W3041202696","https://openalex.org/W3046916097","https://openalex.org/W3104164965","https://openalex.org/W3119097918","https://openalex.org/W3126016767","https://openalex.org/W3136903997","https://openalex.org/W3156926772","https://openalex.org/W3160923286","https://openalex.org/W4243772471","https://openalex.org/W4250739957","https://openalex.org/W4288602110","https://openalex.org/W4289287180","https://openalex.org/W4298064558","https://openalex.org/W4302570325","https://openalex.org/W4318718437","https://openalex.org/W6635767209","https://openalex.org/W6638018090","https://openalex.org/W6677984395","https://openalex.org/W6679257226","https://openalex.org/W6680194329","https://openalex.org/W6683195989","https://openalex.org/W6683204974","https://openalex.org/W6684205842","https://openalex.org/W6684488654","https://openalex.org/W6684921986","https://openalex.org/W6711807133","https://openalex.org/W6734234284","https://openalex.org/W6738056049","https://openalex.org/W6743952311","https://openalex.org/W6747035473","https://openalex.org/W6750027077","https://openalex.org/W6751658861","https://openalex.org/W6760005738","https://openalex.org/W6795071623"],"related_works":["https://openalex.org/W2051058708","https://openalex.org/W90906771","https://openalex.org/W2945629716","https://openalex.org/W2473609169","https://openalex.org/W4283775266","https://openalex.org/W2329573185","https://openalex.org/W4298860769","https://openalex.org/W3043533097","https://openalex.org/W2767126220","https://openalex.org/W3007770227"],"abstract_inverted_index":{"Actor-critic":[0],"algorithm":[1,62,107,126,189],"and":[2,48,84,86],"their":[3],"extensions":[4],"have":[5],"made":[6],"great":[7],"achievements":[8],"in":[9,71,91,134,147],"real-world":[10],"decision-making":[11],"problems.":[12],"In":[13,94,114],"contrast":[14],"to":[15,64,151,156,161,178],"its":[16],"empirical":[17],"success,":[18],"the":[19,23,32,42,46,55,60,72,77,80,87,109,115,124,128,141,162,170,180],"theoretical":[20,121],"understanding":[21],"of":[22,45,59,74,79,123,143,172],"actor-critic":[24,61,106,188],"seems":[25,177],"unsatisfactory.":[26],"Most":[27],"existing":[28],"results":[29],"only":[30],"show":[31,139],"asymptotic":[33],"convergence,":[34],"which":[35],"is":[36],"developed":[37],"mainly":[38],"based":[39],"on":[40],"approximating":[41],"dynamic":[43],"system":[44],"actor":[47,83,144],"critic":[49,116],"using":[50],"ordinary":[51],"differential":[52],"equations.":[53],"However,":[54],"finite-time":[56,100,182],"convergence":[57,101,183],"analysis":[58,102,122,184],"remains":[63],"be":[65],"explored.":[66],"The":[67],"main":[68],"challenges":[69],"lie":[70],"nonconvexity":[73],"parameterized":[75],"policies,":[76],"coupling":[78],"updates":[81],"for":[82,103,127,185],"critic,":[85],"data":[88,133],"sampling":[89],"dependency":[90],"online":[92,105,135,187],"settings.":[93,136],"this":[95],"paper,":[96],"we":[97,118,138],"provide":[98,179],"a":[99,120,148,152],"an":[104,186],"under":[108],"infinite-horizon":[110],"average":[111,129],"reward":[112,130],"setting.":[113],"step,":[117],"give":[119],"TD(0)":[125],"with":[131,190],"dependent":[132],"Besides,":[137],"that":[140],"sequence":[142],"iterates":[145],"converges":[146],"sublinear":[149],"rate":[150],"stationary":[153],"point":[154],"up":[155],"some":[157],"irremovable":[158],"bias":[159],"due":[160],"value":[163],"function":[164],"approximation":[165],"by":[166],"linear":[167],"functions.":[168],"To":[169],"best":[171],"our":[173,175],"knowledge,":[174],"work":[176],"first":[181],"TD":[191],"learning.":[192]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-01T17:29:45.350535","created_date":"2025-10-10T00:00:00"}
