{"id":"https://openalex.org/W3205875899","doi":"https://doi.org/10.1109/mrs50823.2021.9620607","title":"Local Advantage Actor-Critic for Robust Multi-Agent Deep Reinforcement Learning","display_name":"Local Advantage Actor-Critic for Robust Multi-Agent Deep Reinforcement Learning","publication_year":2021,"publication_date":"2021-11-04","ids":{"openalex":"https://openalex.org/W3205875899","doi":"https://doi.org/10.1109/mrs50823.2021.9620607","mag":"3205875899"},"language":"en","primary_location":{"id":"doi:10.1109/mrs50823.2021.9620607","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mrs50823.2021.9620607","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Symposium on Multi-Robot and Multi-Agent Systems (MRS)","raw_type":"proceedings-article"},"type":"conference-paper","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101921812","display_name":"Yuchen Xiao","orcid":"https://orcid.org/0009-0006-7438-9639"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuchen Xiao","raw_affiliation_strings":["Khoury College of Computer Sciences, Northeastern University, Boston, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Khoury College of Computer Sciences, Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079710799","display_name":"Xueguang Lyu","orcid":null},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xueguang Lyu","raw_affiliation_strings":["Khoury College of Computer Sciences, Northeastern University, Boston, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Khoury College of Computer Sciences, Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033129735","display_name":"Christopher Amato","orcid":"https://orcid.org/0000-0002-6786-7384"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Amato","raw_affiliation_strings":["Khoury College of Computer Sciences, Northeastern University, Boston, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Khoury College of Computer Sciences, Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I12912129"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"155","last_page":"163"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9412999749183655,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9020214080810547},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7648690938949585},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7513861656188965},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5779546499252319},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.5455644726753235},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5168622136116028},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5099891424179077},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.49335041642189026},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.45959270000457764},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.42406165599823},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15964683890342712},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.10068309307098389}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9020214080810547},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7648690938949585},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7513861656188965},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5779546499252319},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.5455644726753235},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5168622136116028},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5099891424179077},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49335041642189026},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.45959270000457764},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.42406165599823},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15964683890342712},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.10068309307098389},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mrs50823.2021.9620607","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mrs50823.2021.9620607","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Symposium on Multi-Robot and Multi-Agent Systems (MRS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"display_name":"Life in Land","id":"https://metadata.un.org/sdg/15"}],"awards":[{"id":"https://openalex.org/G1884596452","display_name":null,"funder_award_id":"W911NF-20-1-0265","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G4476123902","display_name":null,"funder_award_id":"N00014-19-1-2131","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G5523557352","display_name":"Career: IIS: RI: Improving Multi-Agent Reinforcement Learning for Cooperative, Partially Observable Settings","funder_award_id":"2044993","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W1560489415","https://openalex.org/W1641379095","https://openalex.org/W2064675550","https://openalex.org/W2099618002","https://openalex.org/W2109910161","https://openalex.org/W2156737235","https://openalex.org/W2161061197","https://openalex.org/W2166533447","https://openalex.org/W2292533394","https://openalex.org/W2604873668","https://openalex.org/W2617547828","https://openalex.org/W2623431351","https://openalex.org/W2747213132","https://openalex.org/W2794643322","https://openalex.org/W2891963971","https://openalex.org/W2894976951","https://openalex.org/W2904455790","https://openalex.org/W2911616846","https://openalex.org/W2951896791","https://openalex.org/W2951984055","https://openalex.org/W2962938168","https://openalex.org/W2962966033","https://openalex.org/W2963407617","https://openalex.org/W2970272688","https://openalex.org/W2970514967","https://openalex.org/W2973525135","https://openalex.org/W2995520132","https://openalex.org/W2995874959","https://openalex.org/W3005850366","https://openalex.org/W3027501728","https://openalex.org/W3034971464","https://openalex.org/W3035802247","https://openalex.org/W3090863380","https://openalex.org/W3093287223","https://openalex.org/W3102824929","https://openalex.org/W3104860527","https://openalex.org/W3118993660","https://openalex.org/W3127052540","https://openalex.org/W3174873920","https://openalex.org/W3176265013","https://openalex.org/W3208932762","https://openalex.org/W4287330971","https://openalex.org/W4288091739","https://openalex.org/W4288594419","https://openalex.org/W4295598622","https://openalex.org/W4299802797","https://openalex.org/W6633466248","https://openalex.org/W6683195989","https://openalex.org/W6736572398","https://openalex.org/W6738796088","https://openalex.org/W6748310522","https://openalex.org/W6749304979","https://openalex.org/W6754689381","https://openalex.org/W6755069753","https://openalex.org/W6758518918","https://openalex.org/W6767151588","https://openalex.org/W6767327128","https://openalex.org/W6767919266","https://openalex.org/W6775529125","https://openalex.org/W6779438722","https://openalex.org/W6780616528","https://openalex.org/W6787618087","https://openalex.org/W6791040878","https://openalex.org/W6803458320"],"related_works":["https://openalex.org/W2032233321","https://openalex.org/W3121970507","https://openalex.org/W2110028391","https://openalex.org/W54497855","https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W3099153698","https://openalex.org/W3184322736","https://openalex.org/W3188986767"],"abstract_inverted_index":{"Policy":[0],"gradient":[1,79,134,171],"methods":[2],"have":[3],"become":[4],"popular":[5],"in":[6,35,138],"multi-agent":[7,77,169],"reinforcement":[8],"learning,":[9],"but":[10,60],"they":[11],"suffer":[12],"from":[13],"high":[14],"variance":[15,130],"due":[16],"to":[17,64,91,128],"the":[18,33,56],"presence":[19],"of":[20,53,68,167],"environmental":[21],"stochasticity":[22],"and":[23,158,162],"exploring":[24],"agents":[25],"(i.e.,":[26],"non-stationarity),":[27],"which":[28,136],"is":[29,42,49],"potentially":[30],"worsened":[31],"by":[32],"difficulty":[34],"credit":[36,150],"assignment.":[37,151],"As":[38],"a":[39,43,46,66,75,98,108,115,126,165],"result,":[40],"there":[41],"need":[44],"for":[45],"method":[47],"that":[48,147],"not":[50],"only":[51],"capable":[52],"efficiently":[54],"solving":[55],"above":[57],"two":[58],"problems":[59],"also":[61],"robust":[62],"enough":[63],"solve":[65],"variety":[67],"tasks.":[69],"To":[70],"this":[71,120],"end,":[72],"we":[73],"propose":[74],"new":[76],"policy":[78,133,170],"method,":[80],"called":[81],"Robust":[82],"Local":[83],"Advantage":[84],"(ROLA)":[85],"Actor-Critic.":[86],"ROLA":[87,154],"allows":[88],"each":[89,123],"agent":[90,124],"learn":[92],"an":[93,139],"individual":[94],"action-value":[95,142],"function":[96],"as":[97,101,103],"local":[99,121],"critic":[100],"well":[102],"ameliorating":[104],"environment":[105],"non-stationarity":[106],"via":[107],"novel":[109],"centralized":[110,116],"training":[111],"approach":[112],"based":[113],"on":[114,131],"critic.":[117],"By":[118],"using":[119],"critic,":[122],"calculates":[125],"baseline":[127],"reduce":[129],"its":[132,160],"estimation,":[135],"results":[137],"expected":[140],"advantage":[141],"over":[143,164],"other":[144],"agents'":[145],"choices":[146],"implicitly":[148],"improves":[149],"We":[152],"evaluate":[153],"across":[155],"diverse":[156],"benchmarks":[157],"show":[159],"robustness":[161],"effectiveness":[163],"number":[166],"state-of-the-art":[168],"algorithms.":[172]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2}],"updated_date":"2026-07-14T23:27:15.235271","created_date":"2025-10-10T00:00:00"}