{"id":"https://openalex.org/W2978307116","doi":"https://doi.org/10.1109/ijcnn.2019.8851866","title":"Model-Free Temporal Difference Learning for Non-Zero-Sum Games","display_name":"Model-Free Temporal Difference Learning for Non-Zero-Sum Games","publication_year":2019,"publication_date":"2019-07-01","ids":{"openalex":"https://openalex.org/W2978307116","doi":"https://doi.org/10.1109/ijcnn.2019.8851866","mag":"2978307116"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2019.8851866","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2019.8851866","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100328722","display_name":"Liming Wang","orcid":"https://orcid.org/0000-0002-3837-2331"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liming Wang","raw_affiliation_strings":["School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053342436","display_name":"Yongliang Yang","orcid":"https://orcid.org/0000-0002-3144-8604"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongliang Yang","raw_affiliation_strings":["School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073089333","display_name":"Da\u2010Wei Ding","orcid":"https://orcid.org/0000-0003-1201-7785"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Ding","raw_affiliation_strings":["School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101512101","display_name":"Yixin Yin","orcid":"https://orcid.org/0000-0001-6056-8878"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yixin Yin","raw_affiliation_strings":["School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067621571","display_name":"Zhishan Guo","orcid":"https://orcid.org/0000-0002-5967-1058"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhishan Guo","raw_affiliation_strings":["Department of Electric and Computer Engineering, University of Central Florida, Orlando, Florida, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electric and Computer Engineering, University of Central Florida, Orlando, Florida, USA","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038037619","display_name":"Donald C. Wunsch","orcid":"https://orcid.org/0000-0002-9726-9051"},"institutions":[{"id":"https://openalex.org/I20382870","display_name":"Missouri University of Science and Technology","ror":"https://ror.org/00scwqd12","country_code":"US","type":"education","lineage":["https://openalex.org/I20382870"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Donald C. Wunsch","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Missouri University of Science and Technology, Rolla, Missouri, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Missouri University of Science and Technology, Rolla, Missouri, USA","institution_ids":["https://openalex.org/I20382870"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100328722"],"corresponding_institution_ids":["https://openalex.org/I92403157"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1566228,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12277","display_name":"Frequency Control in Power Systems","score":0.9426000118255615,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/algebraic-riccati-equation","display_name":"Algebraic Riccati equation","score":0.6772545576095581},{"id":"https://openalex.org/keywords/algebraic-number","display_name":"Algebraic number","score":0.54714035987854},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.5448179244995117},{"id":"https://openalex.org/keywords/algebraic-equation","display_name":"Algebraic equation","score":0.5358703136444092},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.45993995666503906},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4505876898765564},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.4358672797679901},{"id":"https://openalex.org/keywords/zero-sum-game","display_name":"Zero-sum game","score":0.42559006810188293},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.40186846256256104},{"id":"https://openalex.org/keywords/riccati-equation","display_name":"Riccati equation","score":0.38885053992271423},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.3735727071762085},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.36074256896972656},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3455261290073395},{"id":"https://openalex.org/keywords/differential-equation","display_name":"Differential equation","score":0.16022855043411255},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.13123542070388794},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.09331774711608887}],"concepts":[{"id":"https://openalex.org/C13847129","wikidata":"https://www.wikidata.org/wiki/Q4723989","display_name":"Algebraic Riccati equation","level":4,"score":0.6772545576095581},{"id":"https://openalex.org/C9376300","wikidata":"https://www.wikidata.org/wiki/Q168817","display_name":"Algebraic number","level":2,"score":0.54714035987854},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5448179244995117},{"id":"https://openalex.org/C23917780","wikidata":"https://www.wikidata.org/wiki/Q50698","display_name":"Algebraic equation","level":3,"score":0.5358703136444092},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.45993995666503906},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4505876898765564},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.4358672797679901},{"id":"https://openalex.org/C136356330","wikidata":"https://www.wikidata.org/wiki/Q156612","display_name":"Zero-sum game","level":3,"score":0.42559006810188293},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.40186846256256104},{"id":"https://openalex.org/C45473103","wikidata":"https://www.wikidata.org/wiki/Q851503","display_name":"Riccati equation","level":3,"score":0.38885053992271423},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.3735727071762085},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.36074256896972656},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3455261290073395},{"id":"https://openalex.org/C78045399","wikidata":"https://www.wikidata.org/wiki/Q11214","display_name":"Differential equation","level":2,"score":0.16022855043411255},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.13123542070388794},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.09331774711608887},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn.2019.8851866","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2019.8851866","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1614417283","https://openalex.org/W1863485266","https://openalex.org/W1968452087","https://openalex.org/W2005437559","https://openalex.org/W2020313577","https://openalex.org/W2024303516","https://openalex.org/W2052305027","https://openalex.org/W2055300044","https://openalex.org/W2067250995","https://openalex.org/W2068060907","https://openalex.org/W2068949505","https://openalex.org/W2085194340","https://openalex.org/W2102933137","https://openalex.org/W2108286682","https://openalex.org/W2115728676","https://openalex.org/W2183137222","https://openalex.org/W2475651303","https://openalex.org/W2484646121","https://openalex.org/W2573359556","https://openalex.org/W2585299106","https://openalex.org/W2620687756","https://openalex.org/W2735141570","https://openalex.org/W2789789879","https://openalex.org/W2793914020","https://openalex.org/W2803973384","https://openalex.org/W2918660012","https://openalex.org/W2919774326","https://openalex.org/W3150098721","https://openalex.org/W4235469235","https://openalex.org/W6750297707","https://openalex.org/W6751378828"],"related_works":["https://openalex.org/W2005019312","https://openalex.org/W2105459831","https://openalex.org/W1546579375","https://openalex.org/W1563296412","https://openalex.org/W2127238600","https://openalex.org/W2096105566","https://openalex.org/W2114791549","https://openalex.org/W16457797","https://openalex.org/W2792258377","https://openalex.org/W1530596433"],"abstract_inverted_index":{"In":[0,173],"this":[1],"paper,":[2],"we":[3],"consider":[4],"the":[5,19,26,39,44,52,70,78,82,87,94,100,111,116,138,151,170,179,183,192,196,199],"two-player":[6],"nonzero-sum":[7,58],"games":[8,21,59],"problem":[9,22],"for":[10],"continuous-time":[11],"linear":[12,45],"dynamic":[13,46],"systems.":[14],"It":[15,147],"is":[16,74,86,104,130,144,148,189],"shown":[17,149],"that":[18,150],"non-zero-sum":[20,83],"results":[23],"in":[24,160,175],"solving":[25],"coupled":[27,53,95],"algebraic":[28,34,40,54,96],"Riccati":[29,41,55,97],"equations,":[30],"which":[31,85],"are":[32,62],"nonlinear":[33],"matrix":[35],"equations.":[36,98],"Compared":[37],"with":[38,48,60],"equation":[42],"of":[43,57,81,110,137,169,198],"systems":[47],"only":[49,166],"one":[50],"player,":[51],"equations":[56],"multi-player":[61],"more":[63],"difficult":[64],"to":[65,76,92],"be":[66,158],"solved":[67],"directly.":[68],"First,":[69],"policy":[71,101],"iteration":[72,102,177],"algorithm":[73,103,143,156,188],"introduced":[75],"find":[77],"Nash":[79],"equilibrium":[80],"games,":[84],"sufficient":[88],"and":[89,106,164],"necessary":[90],"condition":[91],"solve":[93],"However,":[99],"offline":[105],"requires":[107,165],"complete":[108],"knowledge":[109,168],"system":[112,171],"dynamics.":[113,172],"To":[114],"overcome":[115],"above":[117],"issues,":[118],"a":[119],"novel":[120],"online":[121,162],"iterative":[122],"algorithm,":[123,129],"named":[124],"integral":[125,139,152,184],"temporal":[126,140,153,185],"difference":[127,141,154,186],"learning":[128,142,155,187],"developed.":[131],"Moreover,":[132],"an":[133,161],"equivalent":[134],"compact":[135],"form":[136],"also":[145],"presented.":[146],"can":[157],"implemented":[159],"fashion":[163],"partial":[167],"addition,":[174],"each":[176],"step,":[178],"closed-loop":[180],"stability":[181],"using":[182],"analyzed.":[190],"Finally,":[191],"simulation":[193],"study":[194],"shows":[195],"effectiveness":[197],"presented":[200],"algorithm.":[201]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
