{"id":"https://openalex.org/W4413847144","doi":"https://doi.org/10.1109/tsmc.2025.3600349","title":"Model-Free <i>Q</i>-Learning for Output Feedback Nash Strategy of Decentralized Nonzero-Sum Games","display_name":"Model-Free <i>Q</i>-Learning for Output Feedback Nash Strategy of Decentralized Nonzero-Sum Games","publication_year":2025,"publication_date":"2025-08-29","ids":{"openalex":"https://openalex.org/W4413847144","doi":"https://doi.org/10.1109/tsmc.2025.3600349"},"language":"en","primary_location":{"id":"doi:10.1109/tsmc.2025.3600349","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2025.3600349","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007653946","display_name":"Qingchi Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I80143920","display_name":"Shandong University of Science and Technology","ror":"https://ror.org/04gtjhw98","country_code":"CN","type":"education","lineage":["https://openalex.org/I80143920"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qiyan Zhang","raw_affiliation_strings":["College of Electrical Engineering and Automation, Shandong University of Science and Technology, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Electrical Engineering and Automation, Shandong University of Science and Technology, Qingdao, China","institution_ids":["https://openalex.org/I80143920"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100373283","display_name":"Hongxia Wang","orcid":"https://orcid.org/0000-0003-0024-8893"},"institutions":[{"id":"https://openalex.org/I80143920","display_name":"Shandong University of Science and Technology","ror":"https://ror.org/04gtjhw98","country_code":"CN","type":"education","lineage":["https://openalex.org/I80143920"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongxia Wang","raw_affiliation_strings":["College of Electrical Engineering and Automation, Shandong University of Science and Technology, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Electrical Engineering and Automation, Shandong University of Science and Technology, Qingdao, China","institution_ids":["https://openalex.org/I80143920"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100723884","display_name":"Kai Peng","orcid":"https://orcid.org/0000-0001-9901-6448"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Peng","raw_affiliation_strings":["School of Power and Energy, Northwestern Polytechnical University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"School of Power and Energy, Northwestern Polytechnical University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114377900","display_name":"Huanshui Zhang","orcid":"https://orcid.org/0000-0002-8611-7327"},"institutions":[{"id":"https://openalex.org/I80143920","display_name":"Shandong University of Science and Technology","ror":"https://ror.org/04gtjhw98","country_code":"CN","type":"education","lineage":["https://openalex.org/I80143920"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huanshui Zhang","raw_affiliation_strings":["College of Electrical Engineering and Automation, Shandong University of Science and Technology, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Electrical Engineering and Automation, Shandong University of Science and Technology, Qingdao, China","institution_ids":["https://openalex.org/I80143920"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5007653946"],"corresponding_institution_ids":["https://openalex.org/I80143920"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2768614,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"55","issue":"11","first_page":"7900","last_page":"7910"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9787999987602234,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9787999987602234,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9764999747276306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9305999875068665,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.6754247546195984},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.6261417865753174},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.48526903986930847},{"id":"https://openalex.org/keywords/best-response","display_name":"Best response","score":0.4852156937122345},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4612537622451782},{"id":"https://openalex.org/keywords/epsilon-equilibrium","display_name":"Epsilon-equilibrium","score":0.45559313893318176},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3569357991218567},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3423318862915039},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.2902626395225525},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19272446632385254}],"concepts":[{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.6754247546195984},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.6261417865753174},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.48526903986930847},{"id":"https://openalex.org/C32407928","wikidata":"https://www.wikidata.org/wiki/Q2733833","display_name":"Best response","level":3,"score":0.4852156937122345},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4612537622451782},{"id":"https://openalex.org/C141824439","wikidata":"https://www.wikidata.org/wiki/Q307521","display_name":"Epsilon-equilibrium","level":4,"score":0.45559313893318176},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3569357991218567},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3423318862915039},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2902626395225525},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19272446632385254}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsmc.2025.3600349","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2025.3600349","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1505311503","display_name":null,"funder_award_id":"Y2022-V-0002-0028","funder_id":"https://openalex.org/F4320329860","funder_display_name":"National Science and Technology Major Project"},{"id":"https://openalex.org/G2214483057","display_name":null,"funder_award_id":"J2019-V-0010-0104","funder_id":"https://openalex.org/F4320329860","funder_display_name":"National Science and Technology Major Project"},{"id":"https://openalex.org/G2605969510","display_name":null,"funder_award_id":"ZR2024MF045","funder_id":"https://openalex.org/F4320324174","funder_display_name":"Natural Science Foundation of Shandong Province"},{"id":"https://openalex.org/G4286352005","display_name":null,"funder_award_id":"20220019053002","funder_id":"https://openalex.org/F4320322857","funder_display_name":"Aeronautical Science Foundation of China"},{"id":"https://openalex.org/G8000135041","display_name":null,"funder_award_id":"51506176","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322857","display_name":"Aeronautical Science Foundation of China","ror":"https://ror.org/02wq41p38"},{"id":"https://openalex.org/F4320324174","display_name":"Natural Science Foundation of Shandong Province","ror":null},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W12218011","https://openalex.org/W1991888757","https://openalex.org/W2030740572","https://openalex.org/W2039250476","https://openalex.org/W2070988047","https://openalex.org/W2103171634","https://openalex.org/W2113846837","https://openalex.org/W2115118348","https://openalex.org/W2123827275","https://openalex.org/W2151966330","https://openalex.org/W2761656023","https://openalex.org/W2766053765","https://openalex.org/W2807082273","https://openalex.org/W2807176303","https://openalex.org/W2810037345","https://openalex.org/W2939438632","https://openalex.org/W2962856092","https://openalex.org/W2962990479","https://openalex.org/W3019048224","https://openalex.org/W3124040029","https://openalex.org/W3134387361","https://openalex.org/W3154503377","https://openalex.org/W3205079722","https://openalex.org/W4211232706","https://openalex.org/W4290717511","https://openalex.org/W4317548764","https://openalex.org/W4388666377","https://openalex.org/W4392058233"],"related_works":["https://openalex.org/W4302558842","https://openalex.org/W1554758635","https://openalex.org/W4389370903","https://openalex.org/W2282604764","https://openalex.org/W2803931294","https://openalex.org/W2500870930","https://openalex.org/W1721270588","https://openalex.org/W2010679956","https://openalex.org/W2955320052","https://openalex.org/W4248623114"],"abstract_inverted_index":{"In":[0],"this":[1,59],"article,":[2],"we":[3],"present":[4],"a":[5,61,126],"model-free":[6],"output":[7],"feedback":[8],"(OPFB)":[9],"<italic":[10,79],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[11,80],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Q</i>-learning":[12,81],"algorithm":[13,83],"to":[14,42,86,144,154],"find":[15],"the":[16,22,46,52,67,77,88,94,104,113,131,135,145,156,159],"optimal":[17,63,89,136],"Nash":[18,64],"equilibrium":[19,114],"strategy":[20,65],"for":[21],"decentralized":[23],"control":[24],"problem":[25],"(DCP)":[26],"of":[27,96,134,147,158],"nonzero-sum":[28],"games":[29],"with":[30],"asymmetric":[31,117],"information.":[32],"The":[33,109,139],"main":[34],"challenge":[35],"lies":[36],"in":[37,66],"different":[38],"historical":[39],"information":[40,48,54],"available":[41,97],"each":[43],"controller,":[44],"namely,":[45],"input":[47,98],"is":[49,55,70,84,111,120,142],"shared":[50],"while":[51],"measurement":[53,100],"private.":[56],"To":[57],"overcome":[58],"difficulty,":[60],"novel":[62],"input/output":[68],"form":[69],"derived":[71],"without":[72],"measurable":[73],"system":[74,105],"states.":[75,108],"Then,":[76],"OPFB":[78],"iteration":[82],"developed":[85],"learn":[87],"controllers":[90],"online":[91],"only":[92],"by":[93,122],"knowledge":[95],"and":[99,107],"information,":[101,118],"rather":[102],"than":[103],"dynamics":[106],"key":[110],"solving":[112],"equations":[115],"under":[116],"which":[119],"achieved":[121],"reformulating":[123],"them":[124],"into":[125],"constrained":[127],"minimization":[128],"problem,":[129],"yielding":[130],"numerical":[132],"solution":[133],"controller":[137],"pair.":[138],"presented":[140],"idea":[141],"new":[143],"best":[146],"authors\u2019":[148],"knowledge.":[149],"Numerical":[150],"examples":[151],"are":[152],"shown":[153],"illustrate":[155],"effectiveness":[157],"proposed":[160],"algorithm.":[161]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
