{"id":"https://openalex.org/W4323572072","doi":"https://doi.org/10.48550/arxiv.2303.03376","title":"MAESTRO: Open-Ended Environment Design for Multi-Agent Reinforcement Learning","display_name":"MAESTRO: Open-Ended Environment Design for Multi-Agent Reinforcement Learning","publication_year":2023,"publication_date":"2023-03-06","ids":{"openalex":"https://openalex.org/W4323572072","doi":"https://doi.org/10.48550/arxiv.2303.03376"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2303.03376","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.03376","pdf_url":"https://arxiv.org/pdf/2303.03376","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2303.03376","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016880022","display_name":"Mikayel Samvelyan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Samvelyan, Mikayel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084386381","display_name":"Akbir Khan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khan, Akbir","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011988886","display_name":"Michael J. Dennis","orcid":"https://orcid.org/0000-0002-8175-5311"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dennis, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017949957","display_name":"Minqi Jiang","orcid":"https://orcid.org/0000-0003-1285-0208"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Minqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083828420","display_name":"Jack Parker-Holder","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Parker-Holder, Jack","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059094093","display_name":"Jakob Foerster","orcid":"https://orcid.org/0000-0001-9688-2498"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Foerster, Jakob","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018702533","display_name":"Roberta R\u0103ileanu","orcid":"https://orcid.org/0009-0003-2057-2903"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raileanu, Roberta","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5079315903","display_name":"Tim Rockt\u00e4schel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rockt\u00e4schel, Tim","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5016880022"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9793000221252441,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9793000221252441,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10646","display_name":"Experimental Behavioral Economics Studies","score":0.9753000140190125,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9122999906539917,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8238884210586548},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.771379828453064},{"id":"https://openalex.org/keywords/curriculum","display_name":"Curriculum","score":0.5685129165649414},{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.49369844794273376},{"id":"https://openalex.org/keywords/minimax","display_name":"Minimax","score":0.49138131737709045},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.4759742319583893},{"id":"https://openalex.org/keywords/strategist","display_name":"Strategist","score":0.4460938572883606},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4298577308654785},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.42674171924591064},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.39390039443969727},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.27432650327682495},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.20487424731254578}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8238884210586548},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.771379828453064},{"id":"https://openalex.org/C47177190","wikidata":"https://www.wikidata.org/wiki/Q207137","display_name":"Curriculum","level":2,"score":0.5685129165649414},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.49369844794273376},{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.49138131737709045},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.4759742319583893},{"id":"https://openalex.org/C2779544567","wikidata":"https://www.wikidata.org/wiki/Q7621877","display_name":"Strategist","level":2,"score":0.4460938572883606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4298577308654785},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.42674171924591064},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39390039443969727},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27432650327682495},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.20487424731254578},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2303.03376","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.03376","pdf_url":"https://arxiv.org/pdf/2303.03376","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"pmh:oai:eprints.ucl.ac.uk.OAI2:10216733","is_oa":true,"landing_page_url":"https://discovery.ucl.ac.uk/id/eprint/10216733/","pdf_url":"https://discovery.ucl.ac.uk/10216733/1/2303.03376v1.pdf","source":{"id":"https://openalex.org/S4306400024","display_name":"UCL Discovery (University College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45129253","host_organization_name":"University College London","host_organization_lineage":["https://openalex.org/I45129253"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"In:  Proceedings of the Eleventh International Conference on Learning Representations.    : Kigali, Rwanda. (2023)","raw_type":"Proceedings paper"},{"id":"doi:10.48550/arxiv.2303.03376","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2303.03376","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2303.03376","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.03376","pdf_url":"https://arxiv.org/pdf/2303.03376","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2372286656","https://openalex.org/W2971351794","https://openalex.org/W1548227995","https://openalex.org/W4376155396","https://openalex.org/W2519077613","https://openalex.org/W2097108174","https://openalex.org/W1947085858","https://openalex.org/W2270857826","https://openalex.org/W2586485032","https://openalex.org/W2045928609"],"abstract_inverted_index":{"Open-ended":[0],"learning":[1,21],"methods":[2,24],"that":[3,133],"automatically":[4],"generate":[5],"a":[6,14,71,136],"curriculum":[7,72],"of":[8,46,138],"increasingly":[9],"challenging":[10],"tasks":[11],"serve":[12],"as":[13],"promising":[15],"avenue":[16],"toward":[17],"generally":[18],"capable":[19],"reinforcement":[20],"agents.":[22],"Existing":[23],"adapt":[25],"curricula":[26,117],"independently":[27],"over":[28,118],"either":[29],"environment":[30,66],"parameters":[31],"(in":[32,38],"single-agent":[33],"settings)":[34],"or":[35],"co-player":[36,68],"policies":[37],"multi-agent":[39,74,90,105],"settings).":[40],"However,":[41],"the":[42,62,65,103],"strengths":[43],"and":[44,67,83,121,123,147],"weaknesses":[45],"co-players":[47,122],"can":[48],"manifest":[49],"themselves":[50],"differently":[51],"depending":[52],"on":[53,141],"environmental":[54],"features.":[55],"It":[56],"is":[57],"thus":[58],"crucial":[59],"to":[60,89],"consider":[61],"dependency":[63],"between":[64],"when":[69],"shaping":[70],"in":[73],"domains.":[75],"In":[76],"this":[77,81],"work,":[78],"we":[79],"use":[80],"insight":[82],"extend":[84],"Unsupervised":[85],"Environment":[86,96],"Design":[87,97],"(UED)":[88],"environments.":[91],"We":[92],"then":[93],"introduce":[94],"Multi-Agent":[95],"Strategist":[98],"for":[99,108],"Open-Ended":[100],"Learning":[101],"(MAESTRO),":[102],"first":[104],"UED":[106],"approach":[107],"two-player":[109,143],"zero-sum":[110],"settings.":[111,150],"MAESTRO":[112,134],"efficiently":[113],"produces":[114],"adversarial,":[115],"joint":[116],"both":[119],"environments":[120],"attains":[124],"minimax-regret":[125],"guarantees":[126],"at":[127],"Nash":[128],"equilibrium.":[129],"Our":[130],"experiments":[131],"show":[132],"outperforms":[135],"number":[137],"strong":[139],"baselines":[140],"competitive":[142],"games,":[144],"spanning":[145],"discrete":[146],"continuous":[148],"control":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
