{"id":"https://openalex.org/W2097647952","doi":"https://doi.org/10.1109/cdc.2010.5717607","title":"Online solution of nonlinear two-player zero-sum games using synchronous policy iteration","display_name":"Online solution of nonlinear two-player zero-sum games using synchronous policy iteration","publication_year":2010,"publication_date":"2010-12-01","ids":{"openalex":"https://openalex.org/W2097647952","doi":"https://doi.org/10.1109/cdc.2010.5717607","mag":"2097647952"},"language":"en","primary_location":{"id":"doi:10.1109/cdc.2010.5717607","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc.2010.5717607","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"49th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040301558","display_name":"Kyriakos G. Vamvoudakis","orcid":"https://orcid.org/0000-0003-1978-4848"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]},{"id":"https://openalex.org/I4210116723","display_name":"Robotics Research (United States)","ror":"https://ror.org/020w2fr77","country_code":"US","type":"company","lineage":["https://openalex.org/I4210116723"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kyriakos G. Vamvoudakis","raw_affiliation_strings":["Automation and Robotics Research Institute, University of Texas, Arlington, Fort Worth, TX, USA","Automation and Robotics Research Institute, University of Texas at Arlington, 7300 Jack Newell Blvd. S. Fort Worth, 76118 USA"],"affiliations":[{"raw_affiliation_string":"Automation and Robotics Research Institute, University of Texas, Arlington, Fort Worth, TX, USA","institution_ids":["https://openalex.org/I4210116723"]},{"raw_affiliation_string":"Automation and Robotics Research Institute, University of Texas at Arlington, 7300 Jack Newell Blvd. S. Fort Worth, 76118 USA","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016137188","display_name":"Frank L. Lewis","orcid":"https://orcid.org/0000-0003-4074-1615"},"institutions":[{"id":"https://openalex.org/I4210116723","display_name":"Robotics Research (United States)","ror":"https://ror.org/020w2fr77","country_code":"US","type":"company","lineage":["https://openalex.org/I4210116723"]},{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"F.L. Lewis","raw_affiliation_strings":["Automation and Robotics Research Institute, University of Texas, Arlington, Fort Worth, TX, USA","Automation and Robotics Research Institute, University of Texas at Arlington, 7300 Jack Newell Blvd. S. Fort Worth, 76118 USA"],"affiliations":[{"raw_affiliation_string":"Automation and Robotics Research Institute, University of Texas, Arlington, Fort Worth, TX, USA","institution_ids":["https://openalex.org/I4210116723"]},{"raw_affiliation_string":"Automation and Robotics Research Institute, University of Texas at Arlington, 7300 Jack Newell Blvd. S. Fort Worth, 76118 USA","institution_ids":["https://openalex.org/I189196454"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5040301558"],"corresponding_institution_ids":["https://openalex.org/I189196454","https://openalex.org/I4210116723"],"apc_list":null,"apc_paid":null,"fwci":1.3381,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.81361771,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3040","last_page":"3047"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10040","display_name":"Adaptive Control of Nonlinear Systems","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/saddle-point","display_name":"Saddle point","score":0.6470255851745605},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.624496579170227},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5838429927825928},{"id":"https://openalex.org/keywords/zero-sum-game","display_name":"Zero-sum game","score":0.5836724638938904},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.5649422407150269},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5252066254615784},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5202259421348572},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.4823593497276306},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.4596012234687805},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.4296574592590332},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.4255185127258301},{"id":"https://openalex.org/keywords/saddle","display_name":"Saddle","score":0.414569228887558},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3069308400154114},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.27210408449172974},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.25287848711013794},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.09472984075546265}],"concepts":[{"id":"https://openalex.org/C2681867","wikidata":"https://www.wikidata.org/wiki/Q690935","display_name":"Saddle point","level":2,"score":0.6470255851745605},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.624496579170227},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5838429927825928},{"id":"https://openalex.org/C136356330","wikidata":"https://www.wikidata.org/wiki/Q156612","display_name":"Zero-sum game","level":3,"score":0.5836724638938904},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.5649422407150269},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5252066254615784},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5202259421348572},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.4823593497276306},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.4596012234687805},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.4296574592590332},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.4255185127258301},{"id":"https://openalex.org/C2777127463","wikidata":"https://www.wikidata.org/wiki/Q10862618","display_name":"Saddle","level":2,"score":0.414569228887558},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3069308400154114},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.27210408449172974},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.25287848711013794},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.09472984075546265},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cdc.2010.5717607","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc.2010.5717607","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"49th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.44999998807907104,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W5108103","https://openalex.org/W601351931","https://openalex.org/W648594167","https://openalex.org/W1487586009","https://openalex.org/W1517236425","https://openalex.org/W1576452626","https://openalex.org/W1578630563","https://openalex.org/W1602326725","https://openalex.org/W1606119439","https://openalex.org/W1854776945","https://openalex.org/W1983523797","https://openalex.org/W1990401127","https://openalex.org/W2027197837","https://openalex.org/W2028145673","https://openalex.org/W2037025184","https://openalex.org/W2108286682","https://openalex.org/W2109640889","https://openalex.org/W2116479681","https://openalex.org/W2116508132","https://openalex.org/W2116630766","https://openalex.org/W2121863487","https://openalex.org/W2132468772","https://openalex.org/W2134024695","https://openalex.org/W2164465592","https://openalex.org/W2321292752","https://openalex.org/W2498694880","https://openalex.org/W3043164720","https://openalex.org/W3121926921","https://openalex.org/W3213472335","https://openalex.org/W4214717370","https://openalex.org/W6600197225","https://openalex.org/W6634494593","https://openalex.org/W6781357533","https://openalex.org/W6804409665"],"related_works":["https://openalex.org/W4236459141","https://openalex.org/W4205304778","https://openalex.org/W2020252434","https://openalex.org/W73248859","https://openalex.org/W2584253892","https://openalex.org/W2350324449","https://openalex.org/W1572705989","https://openalex.org/W119381072","https://openalex.org/W2077432297","https://openalex.org/W2078184820"],"abstract_inverted_index":{"In":[0],"this":[1,96],"paper":[2],"we":[3],"present":[4],"an":[5,78],"online":[6,36,97],"gaming":[7,98],"algorithm":[8,34,74,99],"based":[9],"on":[10],"policy":[11,63,103],"iteration":[12],"to":[13,41,112,118,137],"solve":[14],"the":[15,33,39,42,55,59,116,119,138,148,156,159],"continuous-time":[16,84],"(CT)":[17],"two-player":[18],"zero-sum":[19,101],"game":[20,43,102],"with":[21,28],"infinite":[22],"horizon":[23],"cost":[24],"for":[25,129],"nonlinear":[26],"systems":[27],"known":[29],"dynamics.":[30],"That":[31],"is,":[32],"learns":[35],"in":[37,50],"real-time":[38,51],"solution":[40,142],"design":[44],"HJI":[45],"equation.":[46],"This":[47],"method":[48],"finds":[49],"suitable":[52],"approximations":[53],"of":[54,86,107,115,147,158],"optimal":[56,121,139],"value,":[57],"and":[58,64,90,132,145],"saddle":[60,140],"point":[61,141],"control":[62,88],"disturbance":[65,91,133],"policy,":[66],"while":[67],"also":[68,151],"guaranteeing":[69],"closed-loop":[70],"stability.":[71],"The":[72,135],"adaptive":[73],"is":[75,110,143,150],"implemented":[76],"as":[77],"actor/critic":[79],"structure":[80],"which":[81],"involves":[82],"simultaneous":[83],"adaptation":[85],"critic,":[87,130],"actor,":[89],"neural":[92],"networks.":[93,134],"We":[94],"call":[95],"`synchronous'":[100],"iteration.":[104],"A":[105],"persistence":[106],"excitation":[108],"condition":[109],"shown":[111],"guarantee":[113],"convergence":[114,136],"critic":[117],"actual":[120],"value":[122],"function.":[123],"Novel":[124],"tuning":[125],"algorithms":[126],"are":[127],"given":[128],"actor":[131],"proven,":[144],"stability":[146],"system":[149],"guaranteed.":[152],"Simulation":[153],"examples":[154],"show":[155],"effectiveness":[157],"new":[160],"algorithm.":[161]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":4},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}