{"id":"https://openalex.org/W7128787990","doi":"https://doi.org/10.48550/arxiv.2602.11437","title":"Distributionally Robust Cooperative Multi-Agent Reinforcement Learning via Robust Value Factorization","display_name":"Distributionally Robust Cooperative Multi-Agent Reinforcement Learning via Robust Value Factorization","publication_year":2026,"publication_date":"2026-02-11","ids":{"openalex":"https://openalex.org/W7128787990","doi":"https://doi.org/10.48550/arxiv.2602.11437"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.11437","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108929702","display_name":"Chengrui Qu","orcid":"https://orcid.org/0009-0002-6710-7358"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Qu, Chengrui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125899204","display_name":"Christopher Yeh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yeh, Christopher","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125914344","display_name":"Kishan Panaganti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Panaganti, Kishan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093925471","display_name":"Eric Mazumdar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mazumdar, Eric","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120048425","display_name":"Adam Wierman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wierman, Adam","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5108929702"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9279000163078308,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9279000163078308,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.02419999986886978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.0052999998442828655,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6753000020980835},{"id":"https://openalex.org/keywords/bespoke","display_name":"Bespoke","score":0.5817000269889832},{"id":"https://openalex.org/keywords/greedy-algorithm","display_name":"Greedy algorithm","score":0.5248000025749207},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5001999735832214},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.384799987077713},{"id":"https://openalex.org/keywords/robust-optimization","display_name":"Robust optimization","score":0.3779999911785126},{"id":"https://openalex.org/keywords/robust-control","display_name":"Robust control","score":0.3431999981403351},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.3368000090122223}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7939000129699707},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6753000020980835},{"id":"https://openalex.org/C44210515","wikidata":"https://www.wikidata.org/wiki/Q16968978","display_name":"Bespoke","level":2,"score":0.5817000269889832},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5388000011444092},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.5248000025749207},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5001999735832214},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.384799987077713},{"id":"https://openalex.org/C193254401","wikidata":"https://www.wikidata.org/wiki/Q2160088","display_name":"Robust optimization","level":2,"score":0.3779999911785126},{"id":"https://openalex.org/C31531917","wikidata":"https://www.wikidata.org/wiki/Q915157","display_name":"Robust control","level":3,"score":0.3431999981403351},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3368000090122223},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32339999079704285},{"id":"https://openalex.org/C205875254","wikidata":"https://www.wikidata.org/wiki/Q17156857","display_name":"Decentralised system","level":3,"score":0.3197999894618988},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.3025999963283539},{"id":"https://openalex.org/C33762810","wikidata":"https://www.wikidata.org/wiki/Q461671","display_name":"Data integrity","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2694000005722046},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2549999952316284}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.11437","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.11437","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.11437","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.11437","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Life in Land","score":0.466872900724411,"id":"https://metadata.un.org/sdg/15"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Cooperative":[0],"multi-agent":[1],"reinforcement":[2],"learning":[3],"(MARL)":[4],"commonly":[5],"adopts":[6],"centralized":[7],"training":[8],"with":[9,76,99,141],"decentralized":[10,22,100],"execution,":[11],"where":[12],"value-factorization":[13,124],"methods":[14,160],"enforce":[15],"the":[16,26,31,47,77,110],"individual-global-maximum":[17],"(IGM)":[18],"principle":[19,66],"so":[20],"that":[21,67,84,128],"greedy":[23,72,101],"actions":[24],"recover":[25],"team-optimal":[27,79],"joint":[28,80],"action.":[29,81],"However,":[30],"reliability":[32],"of":[33,91,122],"this":[34,57,115],"recipe":[35],"in":[36],"real-world":[37],"settings":[38],"remains":[39],"unreliable":[40],"due":[41],"to":[42,74],"environmental":[43],"uncertainties":[44],"arising":[45],"from":[46],"sim-to-real":[48],"gap,":[49],"model":[50],"mismatch,":[51],"and":[52,103,137,154,166],"system":[53],"noise.":[54],"We":[55,82],"address":[56],"gap":[58],"by":[59],"introducing":[60],"Distributionally":[61],"robust":[62,71,78,92,120,132],"IGM":[63],"(DrIGM),":[64],"a":[65,88,105,155],"requires":[68],"each":[69],"agent's":[70],"action":[73,94],"align":[75],"show":[83],"DrIGM":[85],"holds":[86],"for":[87,109],"novel":[89],"definition":[90],"individual":[93],"values,":[95],"which":[96],"is":[97],"compatible":[98],"execution":[102],"yields":[104],"provable":[106],"robustness":[107],"guarantee":[108],"whole":[111],"system.":[112],"Building":[113],"on":[114,131,150],"foundation,":[116],"we":[117],"derive":[118],"DrIGM-compliant":[119],"variants":[121],"existing":[123,142],"architectures":[125],"(e.g.,":[126],"VDN/QMIX/QTRAN)":[127],"(i)":[129],"train":[130],"Q-targets,":[133],"(ii)":[134],"preserve":[135],"scalability,":[136],"(iii)":[138],"integrate":[139],"seamlessly":[140],"codebases":[143],"without":[144],"bespoke":[145],"per-agent":[146],"reward":[147],"shaping.":[148],"Empirically,":[149],"high-fidelity":[151],"SustainGym":[152],"simulators":[153],"StarCraft":[156],"game":[157],"environment,":[158],"our":[159],"consistently":[161],"improve":[162],"out-of-distribution":[163],"performance.":[164],"Code":[165],"data":[167],"are":[168],"available":[169],"at":[170],"https://github.com/crqu/robust-coMARL.":[171]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-14T00:00:00"}
