{"id":"https://openalex.org/W4403487106","doi":"https://doi.org/10.3233/faia240848","title":"B2MAPO: A Batch-by-Batch Multi-Agent Policy Optimization to Balance Performance and Efficiency","display_name":"B2MAPO: A Batch-by-Batch Multi-Agent Policy Optimization to Balance Performance and Efficiency","publication_year":2024,"publication_date":"2024-10-16","ids":{"openalex":"https://openalex.org/W4403487106","doi":"https://doi.org/10.3233/faia240848"},"language":"en","primary_location":{"id":"doi:10.3233/faia240848","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240848","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia240848","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wenjing Zhang","orcid":"https://orcid.org/0009-0006-2571-604X"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenjing Zhang","raw_affiliation_strings":["Harbin Institute of Technology"],"raw_orcid":"https://orcid.org/0009-0006-2571-604X","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100614543","display_name":"Wei Zhang","orcid":"https://orcid.org/0000-0002-0598-4606"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Zhang","raw_affiliation_strings":["Harbin Institute of Technology"],"raw_orcid":"https://orcid.org/0000-0002-0598-4606","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106066810","display_name":"Wenqing Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenqing Hu","raw_affiliation_strings":["Harbin Institute of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107959293","display_name":"Yifan Wang","orcid":"https://orcid.org/0009-0005-9457-7592"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifan Wang","raw_affiliation_strings":["Harbin Institute of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.53149552,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.8669999837875366,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.8669999837875366,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/balance","display_name":"Balance (ability)","score":0.6305378675460815},{"id":"https://openalex.org/keywords/batch-processing","display_name":"Batch processing","score":0.4201548397541046},{"id":"https://openalex.org/keywords/process-engineering","display_name":"Process engineering","score":0.37324827909469604},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.37023186683654785},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.3208088278770447},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1399616301059723},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.08397212624549866},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.04273700714111328}],"concepts":[{"id":"https://openalex.org/C168031717","wikidata":"https://www.wikidata.org/wiki/Q1530280","display_name":"Balance (ability)","level":2,"score":0.6305378675460815},{"id":"https://openalex.org/C172658912","wikidata":"https://www.wikidata.org/wiki/Q661613","display_name":"Batch processing","level":2,"score":0.4201548397541046},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.37324827909469604},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.37023186683654785},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.3208088278770447},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1399616301059723},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.08397212624549866},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.04273700714111328},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia240848","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240848","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia240848","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240848","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6800000071525574,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W1602582713","https://openalex.org/W2557554316","https://openalex.org/W757360933","https://openalex.org/W2013706548"],"abstract_inverted_index":{"Most":[0],"multi-agent":[1],"reinforcement":[2],"learning":[3],"approaches":[4],"adopt":[5],"two":[6,205],"types":[7],"of":[8,22,61,87,116,168,188,266],"policy":[9,15,38,71,100,150,219,233,246],"optimization":[10],"methods":[11],"that":[12],"either":[13],"update":[14],"simultaneously":[16,235],"or":[17],"sequentially.":[18],"Simultaneously":[19],"updating":[20,30,69,91,101],"policies":[21,31,60,88,207],"all":[23,62],"agents":[24,63,189],"introduces":[25],"non-stationarity":[26],"problem.":[27],"Although":[28],"sequentially":[29,221],"agent-by-agent":[32],"in":[33,51,208],"an":[34],"appropriate":[35],"order":[36,92],"improves":[37],"performance,":[39],"it":[40],"is":[41,110,134,160,163,220,234,247],"prone":[42],"to":[43,47,65,81,136,142,180,193,224],"low":[44],"efficiency":[45],"due":[46],"sequential":[48,98],"execution,":[49,241],"resulting":[50],"longer":[52],"model":[53,281],"training":[54,282],"and":[55,68,77,89,145,152,185,210,259,283,288],"execution":[56,284],"time.":[57],"Intuitively,":[58],"partitioning":[59],"according":[64,223],"their":[66,147],"interdependence":[67,182],"joint":[70,206,218,232,245],"batch-by-batch":[72],"can":[73],"effectively":[74],"balance":[75],"performance":[76,265],"efficiency.":[78,154],"However,":[79],"how":[80],"determine":[82],"the":[83,117,243,264,280],"optimal":[84,195],"batch":[85,90,196,228],"partition":[86],"are":[93,191],"challenging":[94],"problems.":[95],"Firstly,":[96],"a":[97,113,123,156,164],"batched":[99],"scheme,":[102],"B2MAPO":[103,127,158,169,225,268],"(Batch":[104],"by":[105,237,286],"Batch":[106],"Multi-Agent":[107],"Policy":[108],"Optimization),":[109],"proposed":[111],"with":[112,177,227,275],"theoretical":[114],"guarantee":[115],"monotonic":[118],"incrementally":[119],"tightened":[120],"bound.":[121],"Secondly,":[122],"universal":[124],"modulized":[125],"plug-and-play":[126],"hierarchical":[128],"framework,":[129],"which":[130,162,190],"satisfies":[131],"CTDE":[132],"principle,":[133],"designed":[135,166],"conveniently":[137],"integrate":[138],"any":[139],"MARL":[140],"models":[141],"fully":[143],"exploit":[144],"merge":[146],"merits,":[148],"including":[149],"optimality":[151],"inference":[153],"Next,":[155],"DAG-based":[157,267],"algorithm":[159,176,269,278],"devised,":[161],"carefully":[165],"implementation":[167],"framework.":[170],"The":[171,201],"upper":[172],"layer":[173,203],"employs":[174],"PPO":[175],"attention":[178],"mechanism":[179],"reveal":[181],"between":[183,214],"policies,":[184],"generates":[186],"DAGs":[187],"used":[192],"produce":[194],"sequence":[197],"through":[198],"topological":[199],"sorting.":[200],"lower":[202],"trains":[204],"parallel":[209],"minimizes":[211],"KL":[212],"divergence":[213],"them":[215],"periodically.":[216],"One":[217],"updated":[222,236],"scheme":[226],"sequence,":[229],"another":[230],"derived":[231,244],"MAPPO.":[238],"While":[239],"decentralized":[240],"only":[242],"adopted":[248],"for":[249],"decision-making.":[250],"Comprehensive":[251],"experimental":[252],"results":[253],"conducted":[254],"on":[255],"StarCraftII":[256],"Multi-agent":[257],"Challenge":[258],"Google":[260],"Football":[261],"Research":[262],"demonstrate":[263],"outperforms":[270],"baseline":[271],"methods.":[272],"Meanwhile,":[273],"compared":[274],"A2PO,":[276],"our":[277],"reduces":[279],"time":[285],"60.4%":[287],"78.7%,":[289],"respectively.":[290]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
