{"id":"https://openalex.org/W4394994647","doi":"https://doi.org/10.1109/tnnls.2024.3387871","title":"Expected Policy Gradient for Network Aggregative Markov Games in Continuous Space","display_name":"Expected Policy Gradient for Network Aggregative Markov Games in Continuous Space","publication_year":2024,"publication_date":"2024-04-22","ids":{"openalex":"https://openalex.org/W4394994647","doi":"https://doi.org/10.1109/tnnls.2024.3387871","pmid":"https://pubmed.ncbi.nlm.nih.gov/38648129"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2024.3387871","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3387871","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039476230","display_name":"Alireza Ramezani Moghaddam","orcid":null},"institutions":[{"id":"https://openalex.org/I23946033","display_name":"University of Tehran","ror":"https://ror.org/05vf56z40","country_code":"IR","type":"education","lineage":["https://openalex.org/I23946033"]}],"countries":["IR"],"is_corresponding":true,"raw_author_name":"Alireza Ramezani Moghaddam","raw_affiliation_strings":["School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran","College of Engineering, School of Electrical and Computer Engineering, University of Tehran, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran","institution_ids":["https://openalex.org/I23946033"]},{"raw_affiliation_string":"College of Engineering, School of Electrical and Computer Engineering, University of Tehran, Tehran, Iran","institution_ids":["https://openalex.org/I23946033"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081002970","display_name":"Hamed Kebriaei","orcid":"https://orcid.org/0000-0002-3781-2163"},"institutions":[{"id":"https://openalex.org/I23946033","display_name":"University of Tehran","ror":"https://ror.org/05vf56z40","country_code":"IR","type":"education","lineage":["https://openalex.org/I23946033"]},{"id":"https://openalex.org/I4210146419","display_name":"Institute for Research in Fundamental Sciences","ror":"https://ror.org/04xreqs31","country_code":"IR","type":"facility","lineage":["https://openalex.org/I4210146419"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Hamed Kebriaei","raw_affiliation_strings":["School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran","College of Engineering, School of Electrical and Computer Engineering, University of Tehran, Tehran, Iran","School of Computer Science, Institute for Research in Fundamental Sciences (IPM), Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran","institution_ids":["https://openalex.org/I23946033"]},{"raw_affiliation_string":"College of Engineering, School of Electrical and Computer Engineering, University of Tehran, Tehran, Iran","institution_ids":["https://openalex.org/I23946033"]},{"raw_affiliation_string":"School of Computer Science, Institute for Research in Fundamental Sciences (IPM), Tehran, Iran","institution_ids":["https://openalex.org/I4210146419"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5039476230"],"corresponding_institution_ids":["https://openalex.org/I23946033"],"apc_list":null,"apc_paid":null,"fwci":0.7305,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.69452815,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"36","issue":"4","first_page":"7372","last_page":"7381"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":0.8723999857902527,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":0.8723999857902527,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10768","display_name":"Electric Vehicles and Infrastructure","score":0.8303999900817871,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7317000031471252,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7793262600898743},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.7069492936134338},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6618829369544983},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6502757668495178},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.636982262134552},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5195801258087158},{"id":"https://openalex.org/keywords/best-response","display_name":"Best response","score":0.4742516577243805},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4729801416397095},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.455120325088501},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.45417141914367676},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.35749876499176025},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2349216341972351},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.10648095607757568},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.07960367202758789}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7793262600898743},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.7069492936134338},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6618829369544983},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6502757668495178},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.636982262134552},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5195801258087158},{"id":"https://openalex.org/C32407928","wikidata":"https://www.wikidata.org/wiki/Q2733833","display_name":"Best response","level":3,"score":0.4742516577243805},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4729801416397095},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.455120325088501},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.45417141914367676},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.35749876499176025},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2349216341972351},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.10648095607757568},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.07960367202758789},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2024.3387871","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3387871","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:38648129","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38648129","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3091915619","display_name":null,"funder_award_id":"CS 1402-4-208","funder_id":"https://openalex.org/F4320323645","funder_display_name":"Institute for Research in Fundamental Sciences"}],"funders":[{"id":"https://openalex.org/F4320323645","display_name":"Institute for Research in Fundamental Sciences","ror":"https://ror.org/04xreqs31"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1568229137","https://openalex.org/W1583567191","https://openalex.org/W1972221945","https://openalex.org/W2024446871","https://openalex.org/W2094364653","https://openalex.org/W2148437632","https://openalex.org/W2289072378","https://openalex.org/W2545853213","https://openalex.org/W2751692638","https://openalex.org/W2902676408","https://openalex.org/W2913326990","https://openalex.org/W2954232543","https://openalex.org/W2964337555","https://openalex.org/W2996568167","https://openalex.org/W3005549036","https://openalex.org/W3013559981","https://openalex.org/W3020058381","https://openalex.org/W3036329728","https://openalex.org/W3046559364","https://openalex.org/W3143514670","https://openalex.org/W3153119964","https://openalex.org/W3174070171","https://openalex.org/W3197820042","https://openalex.org/W4212814156","https://openalex.org/W4244860029","https://openalex.org/W4280642569","https://openalex.org/W4283835015","https://openalex.org/W4304481207","https://openalex.org/W4315489542","https://openalex.org/W4382931108","https://openalex.org/W4384665356","https://openalex.org/W6683195989","https://openalex.org/W6747941106","https://openalex.org/W7037806652"],"related_works":["https://openalex.org/W2236801283","https://openalex.org/W2728657731","https://openalex.org/W2152670157","https://openalex.org/W2092374696","https://openalex.org/W1853631319","https://openalex.org/W2481143976","https://openalex.org/W2803932348","https://openalex.org/W2386410636","https://openalex.org/W3207342620","https://openalex.org/W2607684552"],"abstract_inverted_index":{"In":[0,20],"this":[1,59,83],"article,":[2],"we":[3,22,85,149],"investigate":[4,175],"the":[5,44,47,72,104,108,112,129,132,152,155,159,165,176,179,189,202,208,211,218,221],"Nash-seeking":[6,73],"problem":[7,74],"of":[8,11,46,58,111,131,154,164,178,213,220],"a":[9,25,64,123,186,194],"set":[10],"agents,":[12],"playing":[13],"an":[14,87,168],"infinite":[15,76],"network":[16,198],"aggregative":[17],"Markov":[18],"game.":[19,166],"particular,":[21],"focus":[23],"on":[24,43,92,136,193],"noncooperative":[26],"framework":[27,190],"where":[28],"each":[29],"agent":[30],"selfishly":[31],"aims":[32],"at":[33],"maximizing":[34],"its":[35,51],"long-term":[36],"average":[37],"reward":[38,53],"without":[39],"having":[40],"explicit":[41],"information":[42],"model":[45],"environment":[48],"dynamics":[49,219],"and":[50,107,118,121,139,210],"own":[52],"function.":[54],"The":[55],"main":[56],"contribution":[57],"article":[60],"is":[61,172,191],"to":[62,102,127,158,174],"develop":[63],"continuous":[65,116],"multiagent":[66],"reinforcement":[67],"learning":[68],"(MARL)":[69],"algorithm":[70,90],"for":[71],"in":[75],"dynamic":[77],"games":[78],"with":[79,97],"convergence":[80],"guarantee.":[81],"To":[82],"end,":[84],"propose":[86],"actor-critic":[88],"MARL":[89],"based":[91],"expected":[93],"policy":[94,110],"gradient":[95,133],"(EPG)":[96],"two":[98],"general":[99],"function":[100,106,147,183],"approximators":[101],"estimate":[103],"value":[105],"Nash":[109,161],"agents.":[113],"We":[114],"consider":[115],"state":[117],"action":[119],"spaces":[120],"adopt":[122],"newly":[124],"proposed":[125],"EPG":[126],"alleviate":[128],"variance":[130],"approximation.":[134,184],"Based":[135],"such":[137],"formulation":[138],"under":[140],"some":[141],"conventional":[142],"assumptions":[143],"(e.g.,":[144],"using":[145],"linear":[146],"approximators),":[148],"prove":[150],"that":[151],"policies":[153],"agents":[156,209],"converge":[157],"unique":[160],"equilibrium":[162],"(NE)":[163],"Furthermore,":[167],"estimation":[169],"error":[170,180],"analysis":[171],"conducted":[173],"effects":[177],"arising":[181],"from":[182],"As":[185],"case":[187],"study,":[188],"applied":[192],"cloud":[195],"radio":[196,204],"access":[197],"(C-RAN)":[199],"by":[200],"modeling":[201],"remote":[203],"heads":[205],"(RRHs)":[206],"as":[207,217],"congestion":[212],"baseband":[214],"units":[215],"(BBUs)":[216],"environment.":[222]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}