{"id":"https://openalex.org/W4417201696","doi":"https://doi.org/10.1109/tcyb.2025.3625137","title":"Signaling-Driven Incentive Communication for Enhanced Multiagent Reinforcement Learning in Dynamic Environments","display_name":"Signaling-Driven Incentive Communication for Enhanced Multiagent Reinforcement Learning in Dynamic Environments","publication_year":2025,"publication_date":"2025-12-10","ids":{"openalex":"https://openalex.org/W4417201696","doi":"https://doi.org/10.1109/tcyb.2025.3625137","pmid":"https://pubmed.ncbi.nlm.nih.gov/41370144"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2025.3625137","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2025.3625137","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056395476","display_name":"Kexing Peng","orcid":"https://orcid.org/0000-0002-2712-9088"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kexing Peng","raw_affiliation_strings":["School of Computer Science, Nanjing University of Information Science and Technology, Nanjing, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Nanjing University of Information Science and Technology, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076957254","display_name":"Pengyi Li","orcid":"https://orcid.org/0009-0009-8546-2346"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengyi Li","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047509839","display_name":"Jianye Hao","orcid":"https://orcid.org/0000-0002-0422-8235"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianye Hao","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5056395476"],"corresponding_institution_ids":["https://openalex.org/I200845125"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20644168,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"56","issue":"2","first_page":"725","last_page":"738"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6582000255584717,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6582000255584717,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10714","display_name":"Software-Defined Networks and 5G","score":0.14710000157356262,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.026799999177455902,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7328000068664551},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6215000152587891},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.511900007724762},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.508400022983551},{"id":"https://openalex.org/keywords/incentive","display_name":"Incentive","score":0.44519999623298645},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.4348999857902527},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.37299999594688416},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.35120001435279846}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7559000253677368},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7328000068664551},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6215000152587891},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5570999979972839},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.511900007724762},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.508400022983551},{"id":"https://openalex.org/C29122968","wikidata":"https://www.wikidata.org/wiki/Q1414816","display_name":"Incentive","level":2,"score":0.44519999623298645},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.4348999857902527},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.37299999594688416},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.35120001435279846},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.30880001187324524},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2808000147342682},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2784000039100647},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2709999978542328},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.25459998846054077},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2524000108242035},{"id":"https://openalex.org/C158156997","wikidata":"https://www.wikidata.org/wiki/Q1416645","display_name":"Models of communication","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2025.3625137","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2025.3625137","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:41370144","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41370144","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G325098684","display_name":null,"funder_award_id":"92370132","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4707140326","display_name":null,"funder_award_id":"624B2101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8875152133","display_name":null,"funder_award_id":"62422605","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2903709398","https://openalex.org/W2982316857","https://openalex.org/W2998367975","https://openalex.org/W3092206109","https://openalex.org/W3156295478","https://openalex.org/W3173215816","https://openalex.org/W3173782550","https://openalex.org/W4283789768","https://openalex.org/W4390342279","https://openalex.org/W4390753602","https://openalex.org/W4391669357","https://openalex.org/W4391919254","https://openalex.org/W4393153207","https://openalex.org/W4393161601","https://openalex.org/W4393241349","https://openalex.org/W4399346003","https://openalex.org/W4399533021","https://openalex.org/W4399772534","https://openalex.org/W4401209885","https://openalex.org/W4402194461","https://openalex.org/W4402401893","https://openalex.org/W4402809058","https://openalex.org/W4403331605","https://openalex.org/W4405441213","https://openalex.org/W7124239827","https://openalex.org/W7124242066"],"related_works":[],"abstract_inverted_index":{"Centralized":[0],"training":[1],"and":[2,15,38,81,104,120,129,141,151,170],"decentralized":[3],"execution":[4],"(CTDE)":[5],"frameworks":[6],"in":[7,17,138,167],"cooperative":[8],"multiagent":[9],"reinforcement":[10],"learning":[11],"(MARL)":[12],"address":[13],"nonstationarity":[14],"scalability":[16],"dynamic":[18],"environments.":[19],"However,":[20],"coordination":[21],"among":[22],"agents":[23,113],"remains":[24],"challenging":[25],"due":[26],"to":[27,32,55,77,114],"limited":[28],"observability,":[29],"often":[30],"leading":[31],"inefficient":[33],"exploration":[34],"of":[35,118,164],"policy":[36,102,173],"spaces":[37],"increased":[39],"communication":[40,43,63,127,152],"overhead.":[41],"Existing":[42],"mechanisms":[44],"partially":[45],"alleviate":[46],"these":[47],"issues":[48],"but":[49],"typically":[50],"add":[51],"complexity":[52],"without":[53],"adapting":[54],"changing":[56],"conditions.":[57],"We":[58],"propose":[59],"the":[60,116,161],"signaling-driven":[61],"incentive":[62],"(SDIC)":[64],"framework,":[65],"a":[66],"novel":[67],"approach":[68],"that":[69,100],"integrates":[70],"Markov":[71],"signaling":[72],"games":[73],"(MSGs)":[74],"into":[75],"CTDE":[76],"enable":[78],"more":[79],"efficient":[80],"targeted":[82],"interagent":[83],"communication.":[84],"By":[85],"integrating":[86],"value-based":[87],"methods":[88],"with":[89,154],"sparse":[90],"communication,":[91],"SDIC":[92,108],"reduces":[93],"unnecessary":[94],"exchanges":[95],"while":[96],"generating":[97],"tailored":[98],"signals":[99],"enhance":[101],"alignment":[103],"improve":[105],"coordination.":[106],"Furthermore,":[107],"incorporates":[109],"partner":[110],"modeling,":[111],"allowing":[112],"anticipate":[115],"behavior":[117],"others":[119],"thus":[121],"strike":[122],"an":[123],"effective":[124,172],"balance":[125],"between":[126],"efficiency":[128,153],"computational":[130,156],"complexity.":[131,157],"Our":[132],"experimental":[133],"results,":[134],"including":[135],"extensive":[136],"evaluations":[137],"StarCraft":[139],"II":[140],"SUMO":[142],"traffic":[143],"simulations,":[144],"demonstrate":[145],"SDIC's":[146,165],"superior":[147],"coordination,":[148],"task":[149],"success,":[150],"manageable":[155],"Ablation":[158],"studies":[159],"validate":[160],"critical":[162],"roles":[163],"components":[166],"reducing":[168],"overhead":[169],"ensuring":[171],"alignment.":[174]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-12-10T00:00:00"}
