{"id":"https://openalex.org/W3119982186","doi":"https://doi.org/10.1109/cdc42340.2020.9303966","title":"Finite-Sample Analysis of Multi-Agent Policy Evaluation with Kernelized Gradient Temporal Difference","display_name":"Finite-Sample Analysis of Multi-Agent Policy Evaluation with Kernelized Gradient Temporal Difference","publication_year":2020,"publication_date":"2020-12-14","ids":{"openalex":"https://openalex.org/W3119982186","doi":"https://doi.org/10.1109/cdc42340.2020.9303966","mag":"3119982186"},"language":"en","primary_location":{"id":"doi:10.1109/cdc42340.2020.9303966","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc42340.2020.9303966","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 59th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015690883","display_name":"Paulo Heredia","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Paulo Heredia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5070733769","display_name":"Shaoshuai Mou","orcid":"https://orcid.org/0000-0002-3698-4238"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shaoshuai Mou","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5015690883"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7036,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.77325082,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"5647","last_page":"5652"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reproducing-kernel-hilbert-space","display_name":"Reproducing kernel Hilbert space","score":0.8435903787612915},{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.6675691604614258},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.6540654897689819},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5924948453903198},{"id":"https://openalex.org/keywords/hilbert-space","display_name":"Hilbert space","score":0.5589672923088074},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5380010604858398},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5255647897720337},{"id":"https://openalex.org/keywords/approximation-error","display_name":"Approximation error","score":0.4845501482486725},{"id":"https://openalex.org/keywords/covariance","display_name":"Covariance","score":0.471230149269104},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.46722492575645447},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.4558218717575073},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.45538875460624695},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4404650330543518},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.4063180387020111},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3852734863758087},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31567811965942383},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.2474532127380371},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.18652111291885376},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.17221343517303467},{"id":"https://openalex.org/keywords/pure-mathematics","display_name":"Pure mathematics","score":0.09099933505058289},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.0886152982711792}],"concepts":[{"id":"https://openalex.org/C80884492","wikidata":"https://www.wikidata.org/wiki/Q3345678","display_name":"Reproducing kernel Hilbert space","level":3,"score":0.8435903787612915},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.6675691604614258},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.6540654897689819},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5924948453903198},{"id":"https://openalex.org/C62799726","wikidata":"https://www.wikidata.org/wiki/Q190056","display_name":"Hilbert space","level":2,"score":0.5589672923088074},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5380010604858398},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5255647897720337},{"id":"https://openalex.org/C122383733","wikidata":"https://www.wikidata.org/wiki/Q865920","display_name":"Approximation error","level":2,"score":0.4845501482486725},{"id":"https://openalex.org/C178650346","wikidata":"https://www.wikidata.org/wiki/Q201984","display_name":"Covariance","level":2,"score":0.471230149269104},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.46722492575645447},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.4558218717575073},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.45538875460624695},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4404650330543518},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.4063180387020111},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3852734863758087},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31567811965942383},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2474532127380371},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.18652111291885376},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.17221343517303467},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.09099933505058289},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0886152982711792},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cdc42340.2020.9303966","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc42340.2020.9303966","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 59th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W109752866","https://openalex.org/W215298514","https://openalex.org/W1540155273","https://openalex.org/W1550946783","https://openalex.org/W2094387729","https://openalex.org/W2107525996","https://openalex.org/W2119112357","https://openalex.org/W2121863487","https://openalex.org/W2122701159","https://openalex.org/W2156737235","https://openalex.org/W2160108098","https://openalex.org/W2165726932","https://openalex.org/W2567931453","https://openalex.org/W2575705757","https://openalex.org/W2578657880","https://openalex.org/W2594873560","https://openalex.org/W2611028343","https://openalex.org/W2763948036","https://openalex.org/W2788115019","https://openalex.org/W2901992463","https://openalex.org/W2903410452","https://openalex.org/W2904435756","https://openalex.org/W2911365170","https://openalex.org/W2913326990","https://openalex.org/W2928539936","https://openalex.org/W2944956041","https://openalex.org/W2947958009","https://openalex.org/W2962771678","https://openalex.org/W2962856794","https://openalex.org/W2963488874","https://openalex.org/W2963747324","https://openalex.org/W2964005211","https://openalex.org/W2964988441","https://openalex.org/W2966363432","https://openalex.org/W2993990522","https://openalex.org/W2994779591","https://openalex.org/W2997604484","https://openalex.org/W3006140144","https://openalex.org/W3010757121","https://openalex.org/W3010790156","https://openalex.org/W3011272463","https://openalex.org/W3027121709","https://openalex.org/W3092621452","https://openalex.org/W3107686539","https://openalex.org/W3110309042","https://openalex.org/W3136903997","https://openalex.org/W4288279122","https://openalex.org/W4288602110","https://openalex.org/W4289743128","https://openalex.org/W4298064558","https://openalex.org/W6608635065","https://openalex.org/W6633018774","https://openalex.org/W6678097026","https://openalex.org/W6678494045","https://openalex.org/W6683195989","https://openalex.org/W6732439103","https://openalex.org/W6748232849","https://openalex.org/W6749032143","https://openalex.org/W6751658861","https://openalex.org/W6751921135","https://openalex.org/W6752877344","https://openalex.org/W6757082500","https://openalex.org/W6757967233","https://openalex.org/W6758937863","https://openalex.org/W6762191990","https://openalex.org/W6766343273","https://openalex.org/W6771232374","https://openalex.org/W6771412591"],"related_works":["https://openalex.org/W2018264451","https://openalex.org/W2591253179","https://openalex.org/W4287671600","https://openalex.org/W4212827636","https://openalex.org/W2141366819","https://openalex.org/W4322716129","https://openalex.org/W3004408979","https://openalex.org/W3210788098","https://openalex.org/W4387349426","https://openalex.org/W3119982186"],"abstract_inverted_index":{"In":[0],"this":[1],"work":[2,31],"we":[3],"will":[4],"provide":[5],"a":[6,10,40,59,100],"finite-sample":[7],"analysis":[8],"of":[9,63,69,77,123],"distributed":[11,93],"gradient":[12],"temporal":[13],"difference":[14],"algorithm":[15,95,111],"for":[16,74],"policy":[17],"evaluation":[18],"with":[19,48,116],"value":[20],"functions":[21],"that":[22,91,105],"lie":[23],"in":[24,118],"Reproducing":[25],"Kernel":[26],"Hilbert":[27],"Spaces":[28],"(RKHS).":[29],"This":[30,88],"focuses":[32],"on":[33],"multi-agent":[34],"systems":[35],"where":[36],"each":[37],"agent":[38],"observes":[39],"private":[41],"reward":[42],"and":[43],"agents":[44],"can":[45,98],"only":[46],"communicate":[47],"nearby":[49],"neighbors":[50],"under":[51,96],"time":[52],"varying":[53],"networks.":[54],"The":[55],"main":[56],"result":[57,89],"is":[58,106,114],"time-evolving":[60],"upper":[61],"bound":[62],"the":[64,70,75,78,84,92,110,119],"second":[65],"order":[66],"error":[67,80,103],"statistics":[68],"algorithm,":[71],"which":[72,113],"accounts":[73],"evolution":[76],"consensus":[79],"as":[81,83],"well":[82],"average":[85],"approximation":[86],"error.":[87],"shows":[90],"learning":[94],"consideration":[97],"achieve":[99],"bounded":[101],"final":[102],"covariance":[104],"inversely":[107],"proportional":[108],"to":[109],"step-size,":[112],"consistent":[115],"results":[117],"more":[120],"general":[121],"field":[122],"stochastic":[124],"approximation.":[125]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}