{"id":"https://openalex.org/W4385338914","doi":"https://doi.org/10.1109/tits.2023.3296769","title":"Multi-Agent Reinforcement Learning With Policy Clipping and Average Evaluation for UAV-Assisted Communication Markov Game","display_name":"Multi-Agent Reinforcement Learning With Policy Clipping and Average Evaluation for UAV-Assisted Communication Markov Game","publication_year":2023,"publication_date":"2023-07-28","ids":{"openalex":"https://openalex.org/W4385338914","doi":"https://doi.org/10.1109/tits.2023.3296769"},"language":"en","primary_location":{"id":"doi:10.1109/tits.2023.3296769","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tits.2023.3296769","pdf_url":null,"source":{"id":"https://openalex.org/S144771191","display_name":"IEEE Transactions on Intelligent Transportation Systems","issn_l":"1524-9050","issn":["1524-9050","1558-0016"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Transportation Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007390986","display_name":"Zikai Feng","orcid":"https://orcid.org/0000-0001-8788-3659"},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]},{"id":"https://openalex.org/I20942203","display_name":"Hainan University","ror":"https://ror.org/03q648j11","country_code":"CN","type":"education","lineage":["https://openalex.org/I20942203"]}],"countries":["CN","SG"],"is_corresponding":true,"raw_author_name":"Zikai Feng","raw_affiliation_strings":["School of Information and Communication Engineering, Hainan University, Haikou, China","State Key Laboratory of Marine Resource Utilization in South China Sea, Haikou, China","Engineering Product Development Pillar, Singapore University of Technology and Design, Tampines, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Hainan University, Haikou, China","institution_ids":["https://openalex.org/I20942203"]},{"raw_affiliation_string":"State Key Laboratory of Marine Resource Utilization in South China Sea, Haikou, China","institution_ids":[]},{"raw_affiliation_string":"Engineering Product Development Pillar, Singapore University of Technology and Design, Tampines, Singapore","institution_ids":["https://openalex.org/I152815399"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036528551","display_name":"Mengxing Huang","orcid":"https://orcid.org/0000-0001-8848-2566"},"institutions":[{"id":"https://openalex.org/I20942203","display_name":"Hainan University","ror":"https://ror.org/03q648j11","country_code":"CN","type":"education","lineage":["https://openalex.org/I20942203"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengxing Huang","raw_affiliation_strings":["School of Information and Communication Engineering, Hainan University, Haikou, China","State Key Laboratory of Marine Resource Utilization in South China Sea, Haikou, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Hainan University, Haikou, China","institution_ids":["https://openalex.org/I20942203"]},{"raw_affiliation_string":"State Key Laboratory of Marine Resource Utilization in South China Sea, Haikou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101557520","display_name":"Di Wu","orcid":"https://orcid.org/0000-0003-1865-9885"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I20942203","display_name":"Hainan University","ror":"https://ror.org/03q648j11","country_code":"CN","type":"education","lineage":["https://openalex.org/I20942203"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Di Wu","raw_affiliation_strings":["School of Information and Communication Engineering, Hainan University, Haikou, China","Department of Automation, Shanghai Jiao Tong University, Shanghai, China","State Key Laboratory of Marine Resource Utilization in South China Sea, Haikou, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Hainan University, Haikou, China","institution_ids":["https://openalex.org/I20942203"]},{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"State Key Laboratory of Marine Resource Utilization in South China Sea, Haikou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110956301","display_name":"Edmond Q. Wu","orcid":"https://orcid.org/0000-0003-1301-9870"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Edmond Q. Wu","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060020877","display_name":"Chau Yuen","orcid":"https://orcid.org/0000-0002-9307-2120"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Chau Yuen","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University, Jurong West, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, Jurong West, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5007390986"],"corresponding_institution_ids":["https://openalex.org/I152815399","https://openalex.org/I20942203"],"apc_list":null,"apc_paid":null,"fwci":30.9742,"has_fulltext":false,"cited_by_count":41,"citation_normalized_percentile":{"value":0.99635739,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"24","issue":"12","first_page":"14281","last_page":"14293"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10148","display_name":"Advanced MIMO Systems Optimization","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8857611417770386},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7823699712753296},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7441869378089905},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6850634813308716},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6271489858627319},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5116653442382812},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5025515556335449},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.4924705922603607},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.45580458641052246},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.4547639489173889},{"id":"https://openalex.org/keywords/base-station","display_name":"Base station","score":0.44265180826187134},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.41904395818710327},{"id":"https://openalex.org/keywords/clipping","display_name":"Clipping (morphology)","score":0.4130405783653259},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.34126943349838257},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2756388187408447},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2472403347492218},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.10686066746711731},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08980569243431091}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8857611417770386},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7823699712753296},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7441869378089905},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6850634813308716},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6271489858627319},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5116653442382812},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5025515556335449},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.4924705922603607},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.45580458641052246},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.4547639489173889},{"id":"https://openalex.org/C68649174","wikidata":"https://www.wikidata.org/wiki/Q1379116","display_name":"Base station","level":2,"score":0.44265180826187134},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.41904395818710327},{"id":"https://openalex.org/C2776848632","wikidata":"https://www.wikidata.org/wiki/Q853463","display_name":"Clipping (morphology)","level":2,"score":0.4130405783653259},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.34126943349838257},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2756388187408447},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2472403347492218},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.10686066746711731},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08980569243431091},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tits.2023.3296769","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tits.2023.3296769","pdf_url":null,"source":{"id":"https://openalex.org/S144771191","display_name":"IEEE Transactions on Intelligent Transportation Systems","issn_l":"1524-9050","issn":["1524-9050","1558-0016"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Transportation Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8100000023841858,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1827367259","display_name":null,"funder_award_id":"622RC618","funder_id":"https://openalex.org/F4320317783","funder_display_name":"Hainan Provincial Postdoctoral Science Foundation"},{"id":"https://openalex.org/G1931484539","display_name":null,"funder_award_id":"62062030","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6721094537","display_name":null,"funder_award_id":"KYQD(ZR)-21007","funder_id":"https://openalex.org/F4320323370","funder_display_name":"Hainan University"},{"id":"https://openalex.org/G7007783471","display_name":null,"funder_award_id":"202207565036","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G8738756575","display_name":null,"funder_award_id":"621QN212","funder_id":"https://openalex.org/F4320322866","funder_display_name":"Natural Science Foundation of Hainan Province"}],"funders":[{"id":"https://openalex.org/F4320317783","display_name":"Hainan Provincial Postdoctoral Science Foundation","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"},{"id":"https://openalex.org/F4320322866","display_name":"Natural Science Foundation of Hainan Province","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323370","display_name":"Hainan University","ror":"https://ror.org/03q648j11"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1522301498","https://openalex.org/W1542941925","https://openalex.org/W1771410628","https://openalex.org/W1972221945","https://openalex.org/W1988456768","https://openalex.org/W2118521282","https://openalex.org/W2119717200","https://openalex.org/W2145339207","https://openalex.org/W2523219060","https://openalex.org/W2736601468","https://openalex.org/W2780441808","https://openalex.org/W2806907026","https://openalex.org/W2889656324","https://openalex.org/W2936176742","https://openalex.org/W2963061782","https://openalex.org/W2968782695","https://openalex.org/W2980965333","https://openalex.org/W2982316857","https://openalex.org/W2993490502","https://openalex.org/W2996037775","https://openalex.org/W3016872443","https://openalex.org/W3019290518","https://openalex.org/W3026956345","https://openalex.org/W3033100261","https://openalex.org/W3119069103","https://openalex.org/W3122751454","https://openalex.org/W3123055967","https://openalex.org/W3124451384","https://openalex.org/W3128011616","https://openalex.org/W3164175916","https://openalex.org/W3188205635","https://openalex.org/W3198627383","https://openalex.org/W4205544270","https://openalex.org/W4213243596","https://openalex.org/W4214624681","https://openalex.org/W4224228169","https://openalex.org/W4226016097","https://openalex.org/W4234761190","https://openalex.org/W4299802797","https://openalex.org/W4318586126","https://openalex.org/W6631190155","https://openalex.org/W6638018090","https://openalex.org/W6684921986","https://openalex.org/W6738796088","https://openalex.org/W6741002519","https://openalex.org/W6771521634","https://openalex.org/W6810227660"],"related_works":["https://openalex.org/W2808418668","https://openalex.org/W2357975469","https://openalex.org/W2101748387","https://openalex.org/W3096874164","https://openalex.org/W4281812492","https://openalex.org/W3105579180","https://openalex.org/W2970347269","https://openalex.org/W3167472281","https://openalex.org/W4400868993","https://openalex.org/W2146763310"],"abstract_inverted_index":{"Unmanned":[0],"aerial":[1,136],"vehicle":[2],"(UAV)-assisted":[3],"communication":[4,166],"is":[5,30,44,98],"a":[6,33],"significant":[7],"technology":[8],"in":[9],"6G":[10],"communication.":[11],"In":[12,151],"order":[13],"to":[14,46,53,62,106,138,144],"cope":[15],"with":[16,100],"the":[17,23,26,38,55,76,108,123,128,135,140,153,159,164],"dynamic":[18],"trajectory":[19],"optimization":[20,96],"problem":[21],"of":[22,58,156],"air-ground":[24],"network,":[25],"interaction":[27],"between":[28],"entities":[29],"modeled":[31],"as":[32,61,81],"Markov":[34,167],"game":[35],"firstly.":[36],"Then,":[37],"model-free":[39],"multi-agent":[40,93],"reinforcement":[41],"learning":[42],"(MARL)":[43],"adopted":[45],"optimize":[47,63],"individual":[48],"decision-making.":[49],"This":[50],"enables":[51],"agents":[52,157],"learn":[54],"mobile":[56,66,142],"patterns":[57],"others,":[59],"so":[60],"their":[64,146],"own":[65],"strategy.":[67],"However,":[68],"there":[69],"are":[70],"some":[71],"common":[72],"issues":[73],"when":[74],"executing":[75],"benchmark":[77,124],"MARL":[78],"algorithms,":[79],"such":[80],"biased":[82],"estimation":[83],"and":[84,103,111,134],"local":[85],"optimum.":[86],"To":[87],"solve":[88],"these":[89],"problems,":[90],"an":[91],"enhanced":[92],"proximal":[94],"policy":[95,101],"algorithm":[97],"proposed":[99],"clipping":[102],"average":[104],"evaluation":[105],"guarantee":[107],"fast":[109],"convergence":[110,121],"accurate":[112],"estimation.":[113],"Simulations":[114],"demonstrate":[115],"that":[116],"this":[117],"method":[118],"produces":[119],"superior":[120],"than":[122],"algorithms.":[125],"It":[126],"allows":[127],"UAV":[129],"base":[130],"station,":[131],"ground":[132],"users":[133],"jammer":[137],"adopt":[139],"optimal":[141],"strategies":[143,155],"achieve":[145],"respective":[147],"maximum":[148],"cumulative":[149],"rewards.":[150],"addition,":[152],"stable":[154],"constitute":[158],"approximate":[160],"Nash":[161],"equilibrium":[162],"for":[163],"UAV-assisted":[165],"Game.":[168]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":23},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":2}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
