{"id":"https://openalex.org/W7127666755","doi":"https://doi.org/10.48550/arxiv.2602.02970","title":"Co2PO: Coordinated Constrained Policy Optimization for Multi-Agent RL","display_name":"Co2PO: Coordinated Constrained Policy Optimization for Multi-Agent RL","publication_year":2026,"publication_date":"2026-02-03","ids":{"openalex":"https://openalex.org/W7127666755","doi":"https://doi.org/10.48550/arxiv.2602.02970"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.02970","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125039343","display_name":"Shrenik Patel","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Patel, Shrenik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124977642","display_name":"Christine Truong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Truong, Christine","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5125039343"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.751800000667572,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.751800000667572,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.0640999972820282,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.02290000021457672,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.677299976348877},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.675599992275238},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4047999978065491},{"id":"https://openalex.org/keywords/broadcasting","display_name":"Broadcasting (networking)","score":0.3783999979496002},{"id":"https://openalex.org/keywords/hazard","display_name":"Hazard","score":0.37709999084472656},{"id":"https://openalex.org/keywords/blackboard","display_name":"Blackboard (design pattern)","score":0.37299999594688416},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.35690000653266907},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.335099995136261}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7346000075340271},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.677299976348877},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.675599992275238},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4047999978065491},{"id":"https://openalex.org/C110157686","wikidata":"https://www.wikidata.org/wiki/Q922122","display_name":"Broadcasting (networking)","level":2,"score":0.3783999979496002},{"id":"https://openalex.org/C49261128","wikidata":"https://www.wikidata.org/wiki/Q1132455","display_name":"Hazard","level":2,"score":0.37709999084472656},{"id":"https://openalex.org/C2778308757","wikidata":"https://www.wikidata.org/wiki/Q23601418","display_name":"Blackboard (design pattern)","level":2,"score":0.37299999594688416},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.35690000653266907},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.33739998936653137},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.335099995136261},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.32339999079704285},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3142000138759613},{"id":"https://openalex.org/C2989514635","wikidata":"https://www.wikidata.org/wiki/Q5164377","display_name":"Constrained optimization problem","level":3,"score":0.3061999976634979},{"id":"https://openalex.org/C134121241","wikidata":"https://www.wikidata.org/wiki/Q899301","display_name":"Yield (engineering)","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.296999990940094},{"id":"https://openalex.org/C2776544517","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Unexpected events","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.28850001096725464},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2574000060558319},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2524000108242035},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.25040000677108765}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.02970","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.02970","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.02970","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.02970","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.6854066848754883,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Constrained":[0],"multi-agent":[1,123],"reinforcement":[2],"learning":[3],"(MARL)":[4],"faces":[5],"a":[6,46,61,74,93,119],"fundamental":[7],"tension":[8],"between":[9],"exploration":[10,38],"and":[11,39,69,102,154],"safety-constrained":[12],"optimization.":[13],"Existing":[14],"leading":[15,40,133],"approaches,":[16],"such":[17],"as":[18],"Lagrangian":[19],"methods,":[20],"typically":[21],"rely":[22],"on":[23],"global":[24],"penalties":[25],"or":[26],"centralized":[27],"critics":[28],"that":[29,51,78],"react":[30],"to":[31,41,100,132,138],"violations":[32,82],"after":[33],"they":[34],"occur,":[35],"often":[36],"suppressing":[37],"over-conservatism.":[42],"We":[43,115],"propose":[44],"Co2PO,":[45],"novel":[47],"MARL":[48],"communication-augmented":[49],"framework":[50],"enables":[52],"coordination-driven":[53],"safety":[54,124],"through":[55],"selective,":[56],"risk-aware":[57],"communication.":[58],"Co2PO":[59,97,117],"introduces":[60],"shared":[62,155],"blackboard":[63],"architecture":[64],"for":[65],"broadcasting":[66],"positional":[67],"intent":[68],"yield":[70],"signals,":[71],"governed":[72],"by":[73],"learned":[75],"hazard":[76],"predictor":[77],"proactively":[79],"forecasts":[80,91],"potential":[81],"over":[83],"an":[84],"extended":[85],"temporal":[86],"horizon.":[87],"By":[88],"integrating":[89],"these":[90],"into":[92],"constrained":[94,134],"optimization":[95],"objective,":[96],"allows":[98],"agents":[99],"anticipate":[101],"navigate":[103],"collective":[104],"hazards":[105],"without":[106],"the":[107,147],"performance":[108],"trade-offs":[109],"inherent":[110],"in":[111],"traditional":[112],"reactive":[113],"constraints.":[114],"evaluate":[116],"across":[118],"suite":[120],"of":[121,149],"complex":[122],"benchmarks,":[125],"where":[126],"it":[127],"achieves":[128],"higher":[129],"returns":[130],"compared":[131],"baselines":[135],"while":[136],"converging":[137],"cost-compliant":[139],"policies":[140],"at":[141],"deployment.":[142],"Ablation":[143],"studies":[144],"further":[145],"validate":[146],"necessity":[148],"risk-triggered":[150],"communication,":[151],"adaptive":[152],"gating,":[153],"memory":[156],"components.":[157]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-06T00:00:00"}
