{"id":"https://openalex.org/W2994724828","doi":"https://doi.org/10.1109/access.2019.2960064","title":"Data-Driven Nonzero-Sum Game for Discrete-Time Systems Using Off-Policy Reinforcement Learning","display_name":"Data-Driven Nonzero-Sum Game for Discrete-Time Systems Using Off-Policy Reinforcement Learning","publication_year":2019,"publication_date":"2019-12-16","ids":{"openalex":"https://openalex.org/W2994724828","doi":"https://doi.org/10.1109/access.2019.2960064","mag":"2994724828"},"language":"en","primary_location":{"id":"doi:10.1109/access.2019.2960064","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2019.2960064","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/08933509.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/08933509.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101763418","display_name":"Yongliang Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yongliang Yang","raw_affiliation_strings":["Key Laboratory of Knowledge Automation for Industrial Processes, Ministry of Education, University of Science and Technology Beijing, Beijing, China","School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Knowledge Automation for Industrial Processes, Ministry of Education, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100378893","display_name":"Sen Zhang","orcid":"https://orcid.org/0000-0002-8010-6045"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sen Zhang","raw_affiliation_strings":["Key Laboratory of Knowledge Automation for Industrial Processes, Ministry of Education, University of Science and Technology Beijing, Beijing, China","School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Knowledge Automation for Industrial Processes, Ministry of Education, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100459936","display_name":"Jie Dong","orcid":"https://orcid.org/0000-0001-7585-6637"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Dong","raw_affiliation_strings":["Key Laboratory of Knowledge Automation for Industrial Processes, Ministry of Education, University of Science and Technology Beijing, Beijing, China","School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Knowledge Automation for Industrial Processes, Ministry of Education, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101512101","display_name":"Yixin Yin","orcid":"https://orcid.org/0000-0001-6056-8878"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yixin Yin","raw_affiliation_strings":["Key Laboratory of Knowledge Automation for Industrial Processes, Ministry of Education, University of Science and Technology Beijing, Beijing, China","School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Knowledge Automation for Industrial Processes, Ministry of Education, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101763418"],"corresponding_institution_ids":["https://openalex.org/I92403157"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.7976,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.7659321,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"8","issue":null,"first_page":"14074","last_page":"14088"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9793999791145325,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10040","display_name":"Adaptive Control of Nonlinear Systems","score":0.9534000158309937,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8403527736663818},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6599586009979248},{"id":"https://openalex.org/keywords/discrete-time-and-continuous-time","display_name":"Discrete time and continuous time","score":0.47779661417007446},{"id":"https://openalex.org/keywords/game-theory","display_name":"Game theory","score":0.4739910364151001},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.4150046408176422},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3777766823768616},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.24821656942367554},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19601109623908997},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.13970601558685303}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8403527736663818},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6599586009979248},{"id":"https://openalex.org/C55689738","wikidata":"https://www.wikidata.org/wiki/Q15963867","display_name":"Discrete time and continuous time","level":2,"score":0.47779661417007446},{"id":"https://openalex.org/C177142836","wikidata":"https://www.wikidata.org/wiki/Q44455","display_name":"Game theory","level":2,"score":0.4739910364151001},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.4150046408176422},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3777766823768616},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.24821656942367554},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19601109623908997},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13970601558685303}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2019.2960064","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2019.2960064","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/08933509.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:80ea185393bd4d70ab7e3d29fb112364","is_oa":true,"landing_page_url":"https://doaj.org/article/80ea185393bd4d70ab7e3d29fb112364","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 8, Pp 14074-14088 (2020)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2019.2960064","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2019.2960064","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/08933509.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1023919524","display_name":null,"funder_award_id":", Grant","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1566696561","display_name":null,"funder_award_id":"61903028","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2135398491","display_name":null,"funder_award_id":"61873024","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2375880139","display_name":null,"funder_award_id":"FRF-BD-19-002A","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G3189497561","display_name":null,"funder_award_id":"1873024","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3863937708","display_name":null,"funder_award_id":"18-03","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5079729378","display_name":null,"funder_award_id":"61673055","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5205608926","display_name":null,"funder_award_id":"FRF-TP-18-031A1","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G5345354591","display_name":null,"funder_award_id":"61773053","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5523829882","display_name":null,"funder_award_id":"2018M641197","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8208342437","display_name":null,"funder_award_id":"1 and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320325365","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2994724828.pdf","grobid_xml":"https://content.openalex.org/works/W2994724828.grobid-xml"},"referenced_works_count":60,"referenced_works":["https://openalex.org/W560518094","https://openalex.org/W602809265","https://openalex.org/W648594167","https://openalex.org/W1513689897","https://openalex.org/W1578630563","https://openalex.org/W1614417283","https://openalex.org/W1972243698","https://openalex.org/W1983523797","https://openalex.org/W2012451615","https://openalex.org/W2019338681","https://openalex.org/W2024890490","https://openalex.org/W2042413420","https://openalex.org/W2055300044","https://openalex.org/W2057784139","https://openalex.org/W2068328804","https://openalex.org/W2068949505","https://openalex.org/W2086977346","https://openalex.org/W2089924634","https://openalex.org/W2100276679","https://openalex.org/W2108383324","https://openalex.org/W2120038868","https://openalex.org/W2121863487","https://openalex.org/W2125256511","https://openalex.org/W2134024695","https://openalex.org/W2136064843","https://openalex.org/W2139416664","https://openalex.org/W2183137222","https://openalex.org/W2312229020","https://openalex.org/W2329769476","https://openalex.org/W2330024298","https://openalex.org/W2484646121","https://openalex.org/W2490503364","https://openalex.org/W2492008049","https://openalex.org/W2529203006","https://openalex.org/W2538000344","https://openalex.org/W2579592432","https://openalex.org/W2580629550","https://openalex.org/W2581606339","https://openalex.org/W2585299106","https://openalex.org/W2590862604","https://openalex.org/W2605603065","https://openalex.org/W2610857016","https://openalex.org/W2620687756","https://openalex.org/W2756081914","https://openalex.org/W2772589676","https://openalex.org/W2782656687","https://openalex.org/W2789789879","https://openalex.org/W2793914020","https://openalex.org/W2800584559","https://openalex.org/W2803973384","https://openalex.org/W2896799653","https://openalex.org/W2908615372","https://openalex.org/W2912270658","https://openalex.org/W2918660012","https://openalex.org/W2919774326","https://openalex.org/W2951650773","https://openalex.org/W2970427121","https://openalex.org/W4214717370","https://openalex.org/W4234761190","https://openalex.org/W6767180398"],"related_works":["https://openalex.org/W2742483371","https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W3087814763","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W4376605461","https://openalex.org/W4400868993","https://openalex.org/W2361647908","https://openalex.org/W2952356279"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"develop":[4],"a":[5,15,45,99],"data-driven":[6],"algorithm":[7,33],"to":[8,71,104],"learn":[9],"the":[10,35,50,57,61,73,92,95,106,109],"Nash":[11],"equilibrium":[12],"solution":[13],"for":[14,59],"two-player":[16,62],"non-zero-sum":[17],"(NZS)":[18],"game":[19],"with":[20],"completely":[21],"unknown":[22],"linear":[23],"discrete-time":[24],"dynamics":[25],"based":[26],"on":[27],"off-policy":[28,67,84],"reinforcement":[29],"learning":[30],"(RL).":[31],"This":[32],"solves":[34],"coupled":[36],"algebraic":[37],"Riccati":[38],"equations":[39],"(CARE)":[40],"forward":[41],"in":[42,44,89],"time":[43],"model-free":[46,66],"manner":[47],"by":[48],"using":[49],"online":[51],"measured":[52],"data.":[53],"We":[54],"first":[55],"derive":[56],"CARE":[58],"solving":[60],"NZS":[63],"game.":[64],"Then,":[65],"RL":[68,85],"is":[69,102],"developed":[70],"obviate":[72],"requirement":[74],"of":[75,78,91,108],"complete":[76],"knowledge":[77],"system":[79],"dynamics.":[80],"Besides,":[81],"on-":[82],"and":[83],"algorithms":[86],"are":[87],"compared":[88],"terms":[90],"robustness":[93],"against":[94],"probing":[96],"noise.":[97],"Finally,":[98],"simulation":[100],"example":[101],"presented":[103,110],"show":[105],"efficacy":[107],"approach.":[111]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
