{"id":"https://openalex.org/W4398187772","doi":"https://doi.org/10.1109/tac.2024.3403693","title":"RL-ARNE: A Reinforcement Learning Algorithm for Computing Average Reward Nash Equilibrium of Nonzero-Sum Stochastic Games","display_name":"RL-ARNE: A Reinforcement Learning Algorithm for Computing Average Reward Nash Equilibrium of Nonzero-Sum Stochastic Games","publication_year":2024,"publication_date":"2024-05-21","ids":{"openalex":"https://openalex.org/W4398187772","doi":"https://doi.org/10.1109/tac.2024.3403693"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2024.3403693","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2024.3403693","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033433868","display_name":"Dinuka Sahabandu","orcid":"https://orcid.org/0000-0001-7776-7865"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dinuka Sahabandu","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Washington, Seattle, WA, USA"],"raw_orcid":"https://orcid.org/0000-0001-7776-7865","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087007662","display_name":"Shana Moothedath","orcid":"https://orcid.org/0000-0001-6091-2384"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shana Moothedath","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Iowa State University, Ames, IA, USA"],"raw_orcid":"https://orcid.org/0000-0001-6091-2384","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007487580","display_name":"Joey Allen","orcid":"https://orcid.org/0000-0002-5503-4123"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]},{"id":"https://openalex.org/I4210108451","display_name":"Palo Alto Networks (United States)","ror":"https://ror.org/01rn6rn86","country_code":"US","type":"company","lineage":["https://openalex.org/I4210108451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joey Allen","raw_affiliation_strings":["Palo Alto Networks, 3000 Tannery Way, Santa Clara, CA, USA","College of Computing, Georgia Institute of Technology, Atlanta, GA, USA"],"raw_orcid":"https://orcid.org/0000-0002-5503-4123","affiliations":[{"raw_affiliation_string":"Palo Alto Networks, 3000 Tannery Way, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210108451"]},{"raw_affiliation_string":"College of Computing, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003489427","display_name":"Linda Bushnell","orcid":"https://orcid.org/0000-0002-8751-2409"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Linda Bushnell","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Washington, Seattle, WA, USA"],"raw_orcid":"https://orcid.org/0000-0002-8751-2409","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047140382","display_name":"Wenke Lee","orcid":"https://orcid.org/0000-0003-2761-1277"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenke Lee","raw_affiliation_strings":["College of Computing, Georgia Institute of Technology, Atlanta, GA, USA"],"raw_orcid":"https://orcid.org/0000-0003-2761-1277","affiliations":[{"raw_affiliation_string":"College of Computing, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079723268","display_name":"Radha Poovendran","orcid":"https://orcid.org/0000-0003-0269-8097"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Radha Poovendran","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Washington, Seattle, WA, USA"],"raw_orcid":"https://orcid.org/0000-0003-0269-8097","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I201448701"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2987,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.81071558,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"69","issue":"11","first_page":"7824","last_page":"7831"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9740999937057495,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9740999937057495,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9340000152587891,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9329000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8245928287506104},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.7612317800521851},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.546488344669342},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4650837779045105},{"id":"https://openalex.org/keywords/best-response","display_name":"Best response","score":0.44302546977996826},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3702651858329773},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.348005473613739},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2563168406486511}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8245928287506104},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.7612317800521851},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.546488344669342},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4650837779045105},{"id":"https://openalex.org/C32407928","wikidata":"https://www.wikidata.org/wiki/Q2733833","display_name":"Best response","level":3,"score":0.44302546977996826},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3702651858329773},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.348005473613739},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2563168406486511}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tac.2024.3403693","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2024.3403693","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2685771227","display_name":null,"funder_award_id":"N00014-16-1-2710 P00002","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G6486092784","display_name":null,"funder_award_id":"FA8650-15-C-7556","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W594357522","https://openalex.org/W1496590343","https://openalex.org/W1566110434","https://openalex.org/W1607809226","https://openalex.org/W1756507684","https://openalex.org/W1968909445","https://openalex.org/W1983009109","https://openalex.org/W1998645260","https://openalex.org/W2002373723","https://openalex.org/W2007854530","https://openalex.org/W2057913812","https://openalex.org/W2062973696","https://openalex.org/W2067050450","https://openalex.org/W2083680012","https://openalex.org/W2094387729","https://openalex.org/W2120846115","https://openalex.org/W2132900772","https://openalex.org/W2142612324","https://openalex.org/W2235056388","https://openalex.org/W2463221887","https://openalex.org/W2766852928","https://openalex.org/W2962990479","https://openalex.org/W2991046523","https://openalex.org/W3176592993","https://openalex.org/W4205326910","https://openalex.org/W4233696721","https://openalex.org/W4254547512","https://openalex.org/W4254971495","https://openalex.org/W4287125173","https://openalex.org/W6631168379","https://openalex.org/W6633996991","https://openalex.org/W6678168664","https://openalex.org/W6678968918","https://openalex.org/W6773140934","https://openalex.org/W6774026043","https://openalex.org/W6796569726","https://openalex.org/W6797801250","https://openalex.org/W6803665415"],"related_works":["https://openalex.org/W2236801283","https://openalex.org/W2728657731","https://openalex.org/W1853631319","https://openalex.org/W2092374696","https://openalex.org/W2481143976","https://openalex.org/W2803932348","https://openalex.org/W3207342620","https://openalex.org/W2607684552","https://openalex.org/W2013767790","https://openalex.org/W4315489088"],"abstract_inverted_index":{"Stochastic":[0],"games":[1,68],"model":[2],"the":[3,25,37,41,47,54,63,79,85,105,137,147],"strategic":[4],"interactions":[5],"between":[6],"two":[7],"or":[8],"more":[9],"players":[10,48],"that":[11,53,61,101],"occur":[12],"in":[13,104],"a":[14,32,71,98,123,158],"sequence":[15],"of":[16,31,40,46,65,78,84,107,131,139,149],"stages.":[17],"In":[18,109],"this":[19,110],"paper":[20],"we":[21,112],"focus":[22],"on":[23,93,157],"computing":[24],"average":[26],"reward":[27,44],"Nash":[28],"equilibrium":[29],"(ARNE)":[30],"nonzero-sum":[33,66,132],"stochastic":[34,67,119,133],"game":[35,42,73,80,155],"when":[36],"transition":[38],"probabilities":[39],"and":[43,96,118],"structure":[45],"are":[49],"unknown.":[50],"We":[51,135,145],"note":[52],"current":[55],"state-of-the-art":[56],"reinforcement":[57],"learning":[58],"(RL)":[59],"algorithms":[60],"compute":[62,128],"ARNE":[64,130],"requires":[69],"solving":[70],"matrix":[72],"corresponding":[74],"to":[75,121,127,142],"each":[76],"state":[77],"at":[81],"every":[82],"iteration":[83],"algorithm,":[86],"which":[87],"is":[88,102],"PPAD":[89],"Polynomial":[90],"Parity":[91],"Arguments":[92],"Directed":[94],"graphs.PPAD-def-complete":[95],"incurs":[97],"memory":[99],"complexity":[100],"exponential":[103],"number":[106],"players.":[108],"paper,":[111],"use":[113],"temporal":[114],"difference":[115],"error":[116],"minimization":[117],"approximation":[120],"develop":[122],"scalable":[124],"RL":[125],"algorithm":[126,141,151],"an":[129,143,153],"games.":[134],"prove":[136],"convergence":[138],"our":[140,150],"ARNE.":[144],"evaluate":[146],"performance":[148],"using":[152],"attacker-defender":[154],"modeled":[156],"real-world":[159],"ransomware":[160],"dataset.":[161]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
