{"id":"https://openalex.org/W4391215774","doi":"https://doi.org/10.1109/tcsii.2024.3358676","title":"Primal-Dual Reinforcement Learning for Zero-Sum Games in the Optimal Tracking Control","display_name":"Primal-Dual Reinforcement Learning for Zero-Sum Games in the Optimal Tracking Control","publication_year":2024,"publication_date":"2024-01-25","ids":{"openalex":"https://openalex.org/W4391215774","doi":"https://doi.org/10.1109/tcsii.2024.3358676"},"language":"en","primary_location":{"id":"doi:10.1109/tcsii.2024.3358676","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/tcsii.2024.3358676","pdf_url":null,"source":{"id":"https://openalex.org/S93916849","display_name":"IEEE Transactions on Circuits & Systems II Express Briefs","issn_l":"1549-7747","issn":["1549-7747","1558-3791"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems II: Express Briefs","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113104326","display_name":"Xuejie Que","orcid":null},"institutions":[{"id":"https://openalex.org/I143593769","display_name":"East China University of Science and Technology","ror":"https://ror.org/01vyrm377","country_code":"CN","type":"education","lineage":["https://openalex.org/I143593769"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xuejie Que","raw_affiliation_strings":["Key Laboratory of Smart Manufacturing in Energy Chemical Process, Ministry of Education, East China University of Science and Technology, Shanghai, China","Ministry of Education, Key Laboratory of Smart Manufacturing in Energy Chemical Process, East China University of Science and Technology, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Smart Manufacturing in Energy Chemical Process, Ministry of Education, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]},{"raw_affiliation_string":"Ministry of Education, Key Laboratory of Smart Manufacturing in Energy Chemical Process, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101906980","display_name":"Zhenlei Wang","orcid":"https://orcid.org/0000-0002-6300-9286"},"institutions":[{"id":"https://openalex.org/I143593769","display_name":"East China University of Science and Technology","ror":"https://ror.org/01vyrm377","country_code":"CN","type":"education","lineage":["https://openalex.org/I143593769"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenlei Wang","raw_affiliation_strings":["Key Laboratory of Smart Manufacturing in Energy Chemical Process, Ministry of Education, East China University of Science and Technology, Shanghai, China","Ministry of Education, Key Laboratory of Smart Manufacturing in Energy Chemical Process, East China University of Science and Technology, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Smart Manufacturing in Energy Chemical Process, Ministry of Education, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]},{"raw_affiliation_string":"Ministry of Education, Key Laboratory of Smart Manufacturing in Energy Chemical Process, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5113104326"],"corresponding_institution_ids":["https://openalex.org/I143593769"],"apc_list":null,"apc_paid":null,"fwci":0.3554,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.55988815,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"71","issue":"6","first_page":"3146","last_page":"3150"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10675","display_name":"Mechanical Circulatory Support Devices","score":0.9466999769210815,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8336993455886841},{"id":"https://openalex.org/keywords/zero-sum-game","display_name":"Zero-sum game","score":0.6565275192260742},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6365307569503784},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.6202025413513184},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.6108239889144897},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5351427793502808},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5313093662261963},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.5062195658683777},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.48571476340293884},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.45154497027397156},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.44081199169158936},{"id":"https://openalex.org/keywords/regular-polygon","display_name":"Regular polygon","score":0.4277487099170685},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.42166346311569214},{"id":"https://openalex.org/keywords/tracking-error","display_name":"Tracking error","score":0.4174846112728119},{"id":"https://openalex.org/keywords/convex-optimization","display_name":"Convex optimization","score":0.4108036160469055},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.40169307589530945},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.3676127791404724},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.34980058670043945},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19476699829101562}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8336993455886841},{"id":"https://openalex.org/C136356330","wikidata":"https://www.wikidata.org/wiki/Q156612","display_name":"Zero-sum game","level":3,"score":0.6565275192260742},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6365307569503784},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.6202025413513184},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.6108239889144897},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5351427793502808},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5313093662261963},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.5062195658683777},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.48571476340293884},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.45154497027397156},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.44081199169158936},{"id":"https://openalex.org/C112680207","wikidata":"https://www.wikidata.org/wiki/Q714886","display_name":"Regular polygon","level":2,"score":0.4277487099170685},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.42166346311569214},{"id":"https://openalex.org/C183356978","wikidata":"https://www.wikidata.org/wiki/Q1779213","display_name":"Tracking error","level":3,"score":0.4174846112728119},{"id":"https://openalex.org/C157972887","wikidata":"https://www.wikidata.org/wiki/Q463359","display_name":"Convex optimization","level":3,"score":0.4108036160469055},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.40169307589530945},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.3676127791404724},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.34980058670043945},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19476699829101562},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsii.2024.3358676","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/tcsii.2024.3358676","pdf_url":null,"source":{"id":"https://openalex.org/S93916849","display_name":"IEEE Transactions on Circuits & Systems II Express Briefs","issn_l":"1549-7747","issn":["1549-7747","1558-3791"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems II: Express Briefs","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5099999904632568,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G3001630108","display_name":null,"funder_award_id":"62373153","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5789780205","display_name":null,"funder_award_id":"61988101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7355380096","display_name":null,"funder_award_id":"62293501","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1977788021","https://openalex.org/W2005437559","https://openalex.org/W2580629550","https://openalex.org/W2769087263","https://openalex.org/W2903410452","https://openalex.org/W2997938989","https://openalex.org/W3008282592","https://openalex.org/W3110708742","https://openalex.org/W3127275112","https://openalex.org/W3127834470","https://openalex.org/W3169248275","https://openalex.org/W3200354666","https://openalex.org/W3201890070","https://openalex.org/W3205613298","https://openalex.org/W3210282889","https://openalex.org/W4200561738","https://openalex.org/W4223502737","https://openalex.org/W4226343901","https://openalex.org/W4250589301","https://openalex.org/W4293194127","https://openalex.org/W4312805937","https://openalex.org/W4384284100"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W3011680666","https://openalex.org/W3136378956","https://openalex.org/W4287871712","https://openalex.org/W3006995042","https://openalex.org/W3004964213","https://openalex.org/W2885522335","https://openalex.org/W2128266000","https://openalex.org/W2236801283"],"abstract_inverted_index":{"The":[0,45],"two-player":[1],"zero-sum":[2],"game":[3],"method":[4,102],"for":[5,71,76,93],"solving":[6],"optimal":[7],"tracking":[8,107],"problems":[9],"with":[10],"external":[11],"disturbance":[12],"has":[13],"been":[14],"extensively":[15],"explored.":[16],"However,":[17],"challenges":[18],"such":[19],"as":[20,81],"the":[21,31,34,39,69,98],"selection":[22],"of":[23,33,59],"initial":[24,62],"admissible":[25],"policies":[26,75],"and":[27,85],"learning":[28,50,101],"errors":[29],"diminish":[30],"accuracy":[32],"Nash":[35,66],"equilibrium,":[36],"even":[37],"limiting":[38],"method\u2019s":[40],"application":[41],"to":[42,64],"some":[43],"extent.":[44],"proposed":[46,99],"model-free":[47],"primal-dual":[48,89],"reinforcement":[49],"algorithm":[51],"utilizes":[52],"state-input":[53],"trajectories":[54],"generated":[55],"by":[56],"a":[57,82,88,112],"set":[58],"linearly":[60],"independent":[61],"vectors":[63],"obtain":[65],"equilibrium":[67],"without":[68],"need":[70],"probing":[72],"noise.":[73],"Admissible":[74],"both":[77],"players":[78],"are":[79],"treated":[80],"non-convex":[83],"constraint":[84],"solved":[86],"from":[87],"perspective.":[90],"Simulation":[91],"results":[92],"an":[94],"inverter":[95],"confirm":[96],"that":[97],"unbiased":[100],"not":[103],"only":[104],"exhibits":[105],"superior":[106],"performance":[108],"but":[109],"also":[110],"demonstrates":[111],"faster":[113],"convergence":[114],"speed.":[115]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
