{"id":"https://openalex.org/W7133336740","doi":"https://doi.org/10.48550/arxiv.2603.00730","title":"MO-MIX: Multi-Objective Multi-Agent Cooperative Decision-Making With Deep Reinforcement Learning","display_name":"MO-MIX: Multi-Objective Multi-Agent Cooperative Decision-Making With Deep Reinforcement Learning","publication_year":2026,"publication_date":"2026-02-28","ids":{"openalex":"https://openalex.org/W7133336740","doi":"https://doi.org/10.48550/arxiv.2603.00730"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.00730","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00730","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.00730","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127970837","display_name":"Tianmeng Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hu, Tianmeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127997885","display_name":"Biao Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Biao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127936400","display_name":"Chunhua Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Chunhua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127894512","display_name":"Tingwen Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Tingwen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5127970837"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.46320000290870667,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.46320000290870667,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.3693999946117401,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.012900000438094139,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8077999949455261},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.43639999628067017},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.40380001068115234},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.3716999888420105},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.35519999265670776},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.32420000433921814},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.30790001153945923},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.3028999865055084},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.2992999851703644}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8077999949455261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7166000008583069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5878000259399414},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.43639999628067017},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42800000309944153},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.40380001068115234},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.373199999332428},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.3716999888420105},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.35519999265670776},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.2992999851703644},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.2903999984264374},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.29030001163482666},{"id":"https://openalex.org/C2986314615","wikidata":"https://www.wikidata.org/wiki/Q36829","display_name":"Pareto optimal","level":3,"score":0.2896000146865845},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C193415008","wikidata":"https://www.wikidata.org/wiki/Q639681","display_name":"Network architecture","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.2669999897480011},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.2619999945163727},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.25609999895095825},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2556999921798706},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.25360000133514404}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.00730","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00730","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.00730","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00730","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7879295945167542,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,82],"learning":[2,83],"(RL)":[3],"has":[4],"been":[5,42],"applied":[6,144],"extensively":[7],"to":[8,28,51,77,131,145],"solve":[9,78,162],"complex":[10],"decision-making":[11,35,59,66,167],"problems.":[12,36],"In":[13,71,137],"many":[14],"real-world":[15],"scenarios,":[16],"tasks":[17],"often":[18],"have":[19,41],"several":[20],"conflicting":[21],"objectives":[22,106],"and":[23,54,169],"may":[24],"require":[25],"multiple":[26],"agents":[27],"cooperate,":[29],"which":[30],"are":[31,49],"the":[32,79,91,105,110,133,147,150,157,163,174,183],"multi-objective":[33,65,80,164],"multi-agent":[34,58,81,165],"However,":[37],"only":[38,56,180],"few":[39],"works":[40],"conducted":[43],"on":[44,90],"this":[45,72],"intersection.":[46],"Existing":[47],"approaches":[48],"limited":[50],"separate":[52],"fields":[53],"can":[55,160],"handle":[57],"with":[60,67,94,126],"a":[61,68,115,123],"single":[62,69],"objective,":[63],"or":[64],"agent.":[70],"paper,":[73],"we":[74],"propose":[75],"MO-MIX":[76],"(MOMARL)":[84],"problem.":[85],"Our":[86,177],"approach":[87,142,178],"is":[88,107,129,143],"based":[89],"centralized":[92],"training":[93],"decentralized":[95,111],"execution":[96],"(CTDE)":[97],"framework.":[98],"A":[99],"weight":[100],"vector":[101],"representing":[102],"preference":[103],"over":[104],"fed":[108],"into":[109],"agent":[112],"network":[113,125],"as":[114],"condition":[116],"for":[117],"local":[118],"action-value":[119,135],"function":[120],"estimation,":[121],"while":[122],"mixing":[124],"parallel":[127],"architecture":[128],"used":[130],"estimate":[132],"joint":[134],"function.":[136],"addition,":[138],"an":[139,171],"exploration":[140],"guide":[141],"improve":[146],"uniformity":[148],"of":[149,173,190],"final":[151],"non-dominated":[152],"solutions.":[153],"Experiments":[154],"demonstrate":[155],"that":[156],"proposed":[158],"method":[159,185],"effectively":[161],"cooperative":[166],"problem":[168],"generate":[170],"approximation":[172],"Pareto":[175],"set.":[176],"not":[179],"significantly":[181],"outperforms":[182],"baseline":[184],"in":[186],"all":[187],"four":[188],"kinds":[189],"evaluation":[191],"metrics,":[192],"but":[193],"also":[194],"requires":[195],"less":[196],"computational":[197],"cost.":[198]},"counts_by_year":[],"updated_date":"2026-03-04T07:09:34.246503","created_date":"2026-03-04T00:00:00"}
