{"id":"https://openalex.org/W4391147933","doi":"https://doi.org/10.1109/tnnls.2024.3351631","title":"Deep Reinforcement Learning for Nash Equilibrium of Differential Games","display_name":"Deep Reinforcement Learning for Nash Equilibrium of Differential Games","publication_year":2024,"publication_date":"2024-01-23","ids":{"openalex":"https://openalex.org/W4391147933","doi":"https://doi.org/10.1109/tnnls.2024.3351631","pmid":"https://pubmed.ncbi.nlm.nih.gov/38261501"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2024.3351631","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3351631","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029381846","display_name":"Zhenyu Li","orcid":"https://orcid.org/0000-0001-7249-962X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhenyu Li","raw_affiliation_strings":["College of Aerospace Science and Engineering, National University of Defense Technology, Changsha, China","Beijing Institute of Tracking and Telecommunications Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7249-962X","affiliations":[{"raw_affiliation_string":"College of Aerospace Science and Engineering, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Beijing Institute of Tracking and Telecommunications Technology, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100614737","display_name":"Ya-Zhong Luo","orcid":"https://orcid.org/0000-0003-0827-7736"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yazhong Luo","raw_affiliation_strings":["College of Aerospace Science and Engineering, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0003-0827-7736","affiliations":[{"raw_affiliation_string":"College of Aerospace Science and Engineering, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5029381846"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":6.8485,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.97183962,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"36","issue":"2","first_page":"2747","last_page":"2761"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9506999850273132,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.8110368251800537},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7533190250396729},{"id":"https://openalex.org/keywords/epsilon-equilibrium","display_name":"Epsilon-equilibrium","score":0.6650720238685608},{"id":"https://openalex.org/keywords/best-response","display_name":"Best response","score":0.6191537976264954},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6159456968307495},{"id":"https://openalex.org/keywords/minimax","display_name":"Minimax","score":0.6118207573890686},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5524324178695679},{"id":"https://openalex.org/keywords/correlated-equilibrium","display_name":"Correlated equilibrium","score":0.46653348207473755},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.4259776175022125},{"id":"https://openalex.org/keywords/equilibrium-selection","display_name":"Equilibrium selection","score":0.42515718936920166},{"id":"https://openalex.org/keywords/game-theory","display_name":"Game theory","score":0.3968789577484131},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.3649411201477051},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34446585178375244},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2945963144302368},{"id":"https://openalex.org/keywords/repeated-game","display_name":"Repeated game","score":0.22881048917770386},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.11957705020904541}],"concepts":[{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.8110368251800537},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7533190250396729},{"id":"https://openalex.org/C141824439","wikidata":"https://www.wikidata.org/wiki/Q307521","display_name":"Epsilon-equilibrium","level":4,"score":0.6650720238685608},{"id":"https://openalex.org/C32407928","wikidata":"https://www.wikidata.org/wiki/Q2733833","display_name":"Best response","level":3,"score":0.6191537976264954},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6159456968307495},{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.6118207573890686},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5524324178695679},{"id":"https://openalex.org/C163630976","wikidata":"https://www.wikidata.org/wiki/Q964667","display_name":"Correlated equilibrium","level":5,"score":0.46653348207473755},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.4259776175022125},{"id":"https://openalex.org/C164407509","wikidata":"https://www.wikidata.org/wiki/Q5384490","display_name":"Equilibrium selection","level":4,"score":0.42515718936920166},{"id":"https://openalex.org/C177142836","wikidata":"https://www.wikidata.org/wiki/Q44455","display_name":"Game theory","level":2,"score":0.3968789577484131},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.3649411201477051},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34446585178375244},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2945963144302368},{"id":"https://openalex.org/C202556891","wikidata":"https://www.wikidata.org/wiki/Q1584646","display_name":"Repeated game","level":3,"score":0.22881048917770386},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.11957705020904541}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2024.3351631","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3351631","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:38261501","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38261501","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G7951135745","display_name":null,"funder_award_id":"12125207","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1519369336","https://openalex.org/W1964757264","https://openalex.org/W2030730276","https://openalex.org/W2145339207","https://openalex.org/W2300738961","https://openalex.org/W2408768628","https://openalex.org/W2482860164","https://openalex.org/W2588293788","https://openalex.org/W2744663568","https://openalex.org/W2749807327","https://openalex.org/W2757507533","https://openalex.org/W2766447205","https://openalex.org/W2777247237","https://openalex.org/W2798500587","https://openalex.org/W2808328343","https://openalex.org/W2884713680","https://openalex.org/W2912831137","https://openalex.org/W2933207051","https://openalex.org/W2945580732","https://openalex.org/W2964106644","https://openalex.org/W2964164283","https://openalex.org/W2964340928","https://openalex.org/W2972302997","https://openalex.org/W2982316857","https://openalex.org/W3038610377","https://openalex.org/W3042974336","https://openalex.org/W3090037761","https://openalex.org/W3110979110","https://openalex.org/W3117938601","https://openalex.org/W3156295478","https://openalex.org/W3161844359","https://openalex.org/W3187550742","https://openalex.org/W4205326910","https://openalex.org/W4214717370","https://openalex.org/W4280642569","https://openalex.org/W4283365574","https://openalex.org/W4294891545","https://openalex.org/W4298857966","https://openalex.org/W4302033101","https://openalex.org/W4307230233","https://openalex.org/W6637967152","https://openalex.org/W6681187623","https://openalex.org/W6683300800","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6696772115","https://openalex.org/W6738546315","https://openalex.org/W6743756900","https://openalex.org/W6747791040","https://openalex.org/W6750645735"],"related_works":["https://openalex.org/W2013767790","https://openalex.org/W4315489088","https://openalex.org/W4301347782","https://openalex.org/W4299551322","https://openalex.org/W1853631319","https://openalex.org/W1667857601","https://openalex.org/W4221157020","https://openalex.org/W3125812435","https://openalex.org/W4302558842","https://openalex.org/W1554758635"],"abstract_inverted_index":{"Nash":[0,28,155,165],"equilibrium":[1,29,156,166],"is":[2,48,64,87,101,112,184],"a":[3,49,57,121,135,143],"significant":[4],"solution":[5],"concept":[6],"representing":[7],"the":[8,27,38,78,91,94,102,127,130,154,164],"optimal":[9],"strategy":[10],"in":[11,157,172],"an":[12,72],"uncooperative":[13],"multiagent":[14],"system.":[15],"This":[16],"study":[17],"presents":[18],"two":[19,131],"deep":[20,41,104],"reinforcement":[21],"learning":[22,51,60,123],"(DRL)":[23],"algorithms":[24,34,151],"for":[25,66,186],"solving":[26],"of":[30,93,129],"differential":[31],"games.":[32,174],"Both":[33],"are":[35],"built":[36],"upon":[37],"distributed":[39],"distributional":[40,103],"deterministic":[42,105],"policy":[43,82,107,137,145],"gradient":[44,83,108,138],"(D4PG)":[45],"algorithm,":[46,110],"which":[47,69,111,183],"one-sided":[50],"method.":[52,61],"We":[53],"modified":[54],"it":[55,118,176],"to":[56,89,141,153],"two-sided":[58],"adversarial":[59,73],"The":[62,99],"first":[63],"D4PG":[65],"games":[67,179],"(D4P2G),":[68],"directly":[70],"applies":[71],"play":[74],"framework":[75,124],"based":[76],"on":[77],"D4PG.":[79],"A":[80],"simultaneous":[81],"descent":[84],"(SPGD)":[85],"method":[86,140],"employed":[88],"optimize":[90],"policies":[92],"players":[95,132],"with":[96,180],"conflicting":[97],"objectives.":[98],"second":[100],"symplectic":[106,136],"(D4SPG)":[109],"our":[113],"main":[114],"contribution.":[115],"More":[116],"specifically,":[117],"newly":[119],"designs":[120],"minimax":[122],"that":[125,149],"combines":[126],"critics":[128],"and":[133,169],"proposes":[134],"adjustment":[139],"find":[142],"better":[144],"gradient.":[146],"Simulations":[147],"show":[148],"both":[150],"converge":[152],"most":[158],"cases,":[159],"but":[160],"D4SPG":[161],"can":[162,177],"learn":[163],"more":[167],"accurately":[168],"efficiently,":[170],"especially":[171],"Hamiltonian":[173],"Moreover,":[175],"handle":[178],"complex":[181],"dynamics,":[182],"challenging":[185],"traditional":[187],"methods.":[188]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":14},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-31T08:46:17.908082","created_date":"2025-10-10T00:00:00"}
