{"id":"https://openalex.org/W4394842928","doi":"https://doi.org/10.1109/tsmc.2024.3382356","title":"Adaptive Optimal Control via Continuous-Time <i>Q</i>-Learning for Stackelberg\u2013Nash Games of Uncertain Nonlinear Systems","display_name":"Adaptive Optimal Control via Continuous-Time <i>Q</i>-Learning for Stackelberg\u2013Nash Games of Uncertain Nonlinear Systems","publication_year":2024,"publication_date":"2024-04-16","ids":{"openalex":"https://openalex.org/W4394842928","doi":"https://doi.org/10.1109/tsmc.2024.3382356"},"language":"en","primary_location":{"id":"doi:10.1109/tsmc.2024.3382356","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2024.3382356","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090191942","display_name":"Shuhang Yu","orcid":"https://orcid.org/0000-0001-9928-8232"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuhang Yu","raw_affiliation_strings":["School of Information Science and Engineering, Northeastern University, Shenyang, Liaoning, China"],"raw_orcid":"https://orcid.org/0000-0001-9928-8232","affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Northeastern University, Shenyang, Liaoning, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100625792","display_name":"Huaguang Zhang","orcid":"https://orcid.org/0000-0002-2375-9824"},"institutions":[{"id":"https://openalex.org/I4391767858","display_name":"State Key Laboratory of Synthetical Automation for Process Industries","ror":"https://ror.org/0380ng272","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391767858","https://openalex.org/I9224756"]},{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaguang Zhang","raw_affiliation_strings":["State Key Laboratory of Synthetical Automation for Process Industries and the School of Information Science and Engineering, Northeastern University, Shenyang, Liaoning, China"],"raw_orcid":"https://orcid.org/0000-0002-2375-9824","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Synthetical Automation for Process Industries and the School of Information Science and Engineering, Northeastern University, Shenyang, Liaoning, China","institution_ids":["https://openalex.org/I9224756","https://openalex.org/I4391767858"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010477724","display_name":"Zhongyang Ming","orcid":"https://orcid.org/0009-0001-1002-6840"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongyang Ming","raw_affiliation_strings":["School of Information Science and Engineering, Northeastern University, Shenyang, Liaoning, China"],"raw_orcid":"https://orcid.org/0009-0001-1002-6840","affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Northeastern University, Shenyang, Liaoning, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002903770","display_name":"Jiayue Sun","orcid":"https://orcid.org/0000-0002-7774-2606"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiayue Sun","raw_affiliation_strings":["School of Information Science and Engineering, Northeastern University, Shenyang, Liaoning, China"],"raw_orcid":"https://orcid.org/0000-0002-7774-2606","affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Northeastern University, Shenyang, Liaoning, China","institution_ids":["https://openalex.org/I9224756"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.4325,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.95072491,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"54","issue":"7","first_page":"4461","last_page":"4470"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10675","display_name":"Mechanical Circulatory Support Devices","score":0.9659000039100647,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lyapunov-function","display_name":"Lyapunov function","score":0.653167724609375},{"id":"https://openalex.org/keywords/stackelberg-competition","display_name":"Stackelberg competition","score":0.6182234883308411},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.5588640570640564},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.5167945623397827},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.5134197473526001},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4797321856021881},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47295281291007996},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.44874095916748047},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.44555121660232544},{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.4207095503807068},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3937687873840332},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.30564332008361816},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.2190965712070465},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.10805553197860718}],"concepts":[{"id":"https://openalex.org/C60640748","wikidata":"https://www.wikidata.org/wiki/Q2337858","display_name":"Lyapunov function","level":3,"score":0.653167724609375},{"id":"https://openalex.org/C199510392","wikidata":"https://www.wikidata.org/wiki/Q1184602","display_name":"Stackelberg competition","level":2,"score":0.6182234883308411},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5588640570640564},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.5167945623397827},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.5134197473526001},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4797321856021881},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47295281291007996},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.44874095916748047},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.44555121660232544},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.4207095503807068},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3937687873840332},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.30564332008361816},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.2190965712070465},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.10805553197860718},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsmc.2024.3382356","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2024.3382356","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2034784894","display_name":null,"funder_award_id":"U23B20118","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G837822842","display_name":null,"funder_award_id":"XLYC1801005","funder_id":"https://openalex.org/F4320329895","funder_display_name":"Liaoning Revitalization Talents Program"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329895","display_name":"Liaoning Revitalization Talents Program","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1968908471","https://openalex.org/W1997603681","https://openalex.org/W2013895638","https://openalex.org/W2020688906","https://openalex.org/W2052305027","https://openalex.org/W2079726633","https://openalex.org/W2081514674","https://openalex.org/W2100276679","https://openalex.org/W2344109271","https://openalex.org/W2484646121","https://openalex.org/W2564717627","https://openalex.org/W2585472245","https://openalex.org/W2998579696","https://openalex.org/W3021478157","https://openalex.org/W3084233102","https://openalex.org/W3096135216","https://openalex.org/W3135757144","https://openalex.org/W3153781554","https://openalex.org/W3195322794","https://openalex.org/W3196960188","https://openalex.org/W4205561196","https://openalex.org/W4285505818","https://openalex.org/W4290717511","https://openalex.org/W4309366246","https://openalex.org/W4312721382","https://openalex.org/W4313124745","https://openalex.org/W4327522695"],"related_works":["https://openalex.org/W2976114880","https://openalex.org/W2328458106","https://openalex.org/W2803931294","https://openalex.org/W2282604764","https://openalex.org/W2569414037","https://openalex.org/W2500870930","https://openalex.org/W2218535193","https://openalex.org/W4389370903","https://openalex.org/W2468739564","https://openalex.org/W1554758635"],"abstract_inverted_index":{"In":[0],"order":[1,92],"to":[2,59,93,114,142],"solve":[3],"the":[4,11,47,51,60,74,100,108,116,119,125,144,147],"two-player":[5],"Stackelberg":[6],"differential":[7],"game":[8],"(SDG)":[9],"for":[10,50,103],"continuous-time":[12],"nonlinear":[13,105,122],"Markov":[14],"jump":[15],"system":[16,44],"(MJS),":[17],"this":[18],"article":[19],"defines":[20],"a":[21,32,79,137],"unique":[22],"<inline-formula":[23,82],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[24,83],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[25,84],"<tex-math":[26,85],"notation=\"LaTeX\">$Q$</tex-math>":[27,86],"</inline-formula>":[28,87],"-function":[29,88],"and":[30,53,62],"suggests":[31],"novel":[33,80],"adaptive":[34],"dynamic":[35],"programming":[36],"(ADP)":[37],"method":[38],"which":[39,98],"is":[40,64,89,99,112,140],"completely":[41,95],"independent":[42],"of":[43,118,146],"information.":[45],"First,":[46],"optimal":[48],"policies":[49,69],"leader":[52],"follower":[54],"are":[55,70],"determined":[56],"from":[57],"down":[58],"top,":[61],"it":[63],"further":[65],"demonstrated":[66],"that":[67],"these":[68],"what":[71],"make":[72],"up":[73],"Stackelberg\u2013Nash":[75],"equilibrium":[76],"point.":[77],"Then,":[78],"action-dependent":[81],"established":[90],"in":[91],"attain":[94],"model-free":[96],"learning,":[97],"first":[101],"attempt":[102],"SDG-based":[104],"MJS.":[106],"Furthermore,":[107],"Lyapunov":[109],"direct":[110],"approach":[111],"employed":[113],"guarantee":[115],"stability":[117],"closed-loop":[120],"uncertain":[121],"MJS":[123],"under":[124],"control":[126,150],"scheme":[127],"based":[128],"on":[129],"ADP,":[130],"ensuring":[131],"uniform":[132],"ultimate":[133],"boundedness":[134],"(UUB).":[135],"Ultimately,":[136],"numerical":[138],"simulation":[139],"presented":[141],"validate":[143],"efficacy":[145],"aforementioned":[148],"ADP-based":[149],"approach.":[151]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-18T10:00:31.954636","created_date":"2025-10-10T00:00:00"}