{"id":"https://openalex.org/W4386470330","doi":"https://doi.org/10.1109/tnnls.2023.3309608","title":"NVIF: Neighboring Variational Information Flow for Cooperative Large-Scale Multiagent Reinforcement Learning","display_name":"NVIF: Neighboring Variational Information Flow for Cooperative Large-Scale Multiagent Reinforcement Learning","publication_year":2023,"publication_date":"2023-09-06","ids":{"openalex":"https://openalex.org/W4386470330","doi":"https://doi.org/10.1109/tnnls.2023.3309608","pmid":"https://pubmed.ncbi.nlm.nih.gov/37672377"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2023.3309608","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3309608","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073383645","display_name":"Jiajun Chai","orcid":"https://orcid.org/0000-0002-7611-064X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiajun Chai","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080193690","display_name":"Yuanheng Zhu","orcid":"https://orcid.org/0000-0001-5384-423X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanheng Zhu","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100624298","display_name":"Dongbin Zhao","orcid":"https://orcid.org/0000-0001-8218-9633"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongbin Zhao","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5073383645"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210100255","https://openalex.org/I4210112150","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":2.0876,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.89690841,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"35","issue":"12","first_page":"17829","last_page":"17841"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.965499997138977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.965499997138977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8319075107574463},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7861572504043579},{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.5875740647315979},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.5711801648139954},{"id":"https://openalex.org/keywords/information-exchange","display_name":"Information exchange","score":0.5649741291999817},{"id":"https://openalex.org/keywords/information-flow","display_name":"Information flow","score":0.5588812232017517},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5079911351203918},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4993143081665039},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4575239419937134},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4501311182975769},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.43952393531799316},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.42807507514953613},{"id":"https://openalex.org/keywords/communications-protocol","display_name":"Communications protocol","score":0.4253201186656952},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4004441201686859},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3768864870071411}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8319075107574463},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7861572504043579},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.5875740647315979},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.5711801648139954},{"id":"https://openalex.org/C189693848","wikidata":"https://www.wikidata.org/wiki/Q6031064","display_name":"Information exchange","level":2,"score":0.5649741291999817},{"id":"https://openalex.org/C2779136372","wikidata":"https://www.wikidata.org/wiki/Q10283002","display_name":"Information flow","level":2,"score":0.5588812232017517},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5079911351203918},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4993143081665039},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4575239419937134},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4501311182975769},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.43952393531799316},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.42807507514953613},{"id":"https://openalex.org/C12269588","wikidata":"https://www.wikidata.org/wiki/Q132364","display_name":"Communications protocol","level":2,"score":0.4253201186656952},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4004441201686859},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3768864870071411},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C142724271","wikidata":"https://www.wikidata.org/wiki/Q7208","display_name":"Pathology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C204787440","wikidata":"https://www.wikidata.org/wiki/Q188504","display_name":"Alternative medicine","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2023.3309608","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3309608","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:37672377","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37672377","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","score":0.49000000953674316,"display_name":"Partnerships for the goals"}],"awards":[{"id":"https://openalex.org/G1594336315","display_name":null,"funder_award_id":"2018AAA0102404","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G800914696","display_name":null,"funder_award_id":"62136008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8179479325","display_name":null,"funder_award_id":"62293541","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W2012812921","https://openalex.org/W2167183308","https://openalex.org/W2480536132","https://openalex.org/W2538000344","https://openalex.org/W2877133098","https://openalex.org/W2911573951","https://openalex.org/W2911917164","https://openalex.org/W2962764167","https://openalex.org/W2963890729","https://openalex.org/W2969732751","https://openalex.org/W2994707981","https://openalex.org/W2996525917","https://openalex.org/W2997070234","https://openalex.org/W2998367975","https://openalex.org/W3084241738","https://openalex.org/W3099689767","https://openalex.org/W3110979110","https://openalex.org/W3153042747","https://openalex.org/W3192813613","https://openalex.org/W3196869853","https://openalex.org/W3210169707","https://openalex.org/W4312283068","https://openalex.org/W4313293015","https://openalex.org/W4321021823","https://openalex.org/W4375929017","https://openalex.org/W6627932998","https://openalex.org/W6640212811","https://openalex.org/W6712181171","https://openalex.org/W6713411898","https://openalex.org/W6726873649","https://openalex.org/W6741002519","https://openalex.org/W6747941106","https://openalex.org/W6749032143","https://openalex.org/W6749304979","https://openalex.org/W6751139674","https://openalex.org/W6755069753","https://openalex.org/W6757096465","https://openalex.org/W6757781149","https://openalex.org/W6757784512","https://openalex.org/W6768539364","https://openalex.org/W6775715841","https://openalex.org/W6779753894","https://openalex.org/W6790415032","https://openalex.org/W6796861069","https://openalex.org/W6803290710","https://openalex.org/W6840380725"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2928233854","https://openalex.org/W65846195","https://openalex.org/W2325440561","https://openalex.org/W2080321523","https://openalex.org/W2263935216"],"abstract_inverted_index":{"Communication-based":[0],"multiagent":[1,26],"reinforcement":[2],"learning":[3],"(MARL)":[4],"has":[5,157],"shown":[6],"promising":[7],"results":[8,167,182],"in":[9,24,174],"promoting":[10],"cooperation":[11,162],"by":[12,43,68],"enabling":[13],"agents":[14,67],"to":[15,29,36,121,135,160],"exchange":[16],"information.":[17],"However,":[18],"the":[19,38,44,72,81,86,99,112,132,158,169,185],"existing":[20,82],"methods":[21],"have":[22],"limitations":[23],"large-scale":[25],"systems":[27],"due":[28],"high":[30],"information":[31,59,74,79],"redundancy,":[32],"and":[33,125,129,177],"they":[34],"tend":[35],"overlook":[37],"unstable":[39],"training":[40,101,107,139],"process":[41],"caused":[42],"online-trained":[45],"communication":[46,64,127],"protocol.":[47],"In":[48],"this":[49],"work,":[50],"we":[51,103],"propose":[52],"a":[53,89,105,116,123],"novel":[54],"method":[55,173,189],"called":[56],"neighboring":[57,66,95],"variational":[58],"flow":[60],"(NVIF),":[61],"which":[62,152],"enhances":[63],"among":[65],"providing":[69],"them":[70],"with":[71,140,155,163],"maximum":[73],"set":[75],"(MIS)":[76],"containing":[77],"more":[78],"than":[80],"methods.":[83],"NVIF":[84,113,154],"compresses":[85],"MIS":[87],"into":[88],"compact":[90],"latent":[91],"state":[92],"while":[93],"adopting":[94],"communication.":[96],"To":[97],"stabilize":[98],"overall":[100],"process,":[102],"introduce":[104],"two-stage":[106],"mechanism.":[108],"We":[109],"first":[110],"pretrain":[111],"module":[114],"using":[115],"randomly":[117],"sampled":[118],"offline":[119],"dataset":[120],"create":[122],"task-agnostic":[124],"stable":[126],"protocol,":[128],"then":[130],"use":[131],"pretrained":[133],"protocol":[134],"perform":[136],"online":[137],"policy":[138,149],"RL":[141],"algorithms.":[142],"Our":[143],"theoretical":[144],"analysis":[145],"indicates":[146],"that":[147],"NVIF-proximal":[148],"optimization":[150],"(PPO),":[151],"combines":[153],"PPO,":[156],"potential":[159,186],"promote":[161],"agent-specific":[164],"rewards.":[165],"Experiment":[166],"demonstrate":[168,184],"superiority":[170],"of":[171,187],"our":[172,188],"both":[175],"heterogeneous":[176],"homogeneous":[178],"settings.":[179],"Additional":[180],"experiment":[181],"also":[183],"for":[190],"multitask":[191],"learning.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":8}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
