{"id":"https://openalex.org/W4382319558","doi":"https://doi.org/10.48550/arxiv.2306.13831","title":"Minigrid &amp; Miniworld: Modular &amp; Customizable Reinforcement Learning Environments for Goal-Oriented Tasks","display_name":"Minigrid &amp; Miniworld: Modular &amp; Customizable Reinforcement Learning Environments for Goal-Oriented Tasks","publication_year":2023,"publication_date":"2023-06-24","ids":{"openalex":"https://openalex.org/W4382319558","doi":"https://doi.org/10.48550/arxiv.2306.13831"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2306.13831","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.13831","pdf_url":"https://arxiv.org/pdf/2306.13831","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2306.13831","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027634678","display_name":"Maxime Chevalier-Boisvert","orcid":"https://orcid.org/0009-0007-1325-1676"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chevalier-Boisvert, Maxime","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012067336","display_name":"Bolun Dai","orcid":"https://orcid.org/0000-0002-7590-7980"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Bolun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108585735","display_name":"Mark Towers","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Towers, Mark","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092346368","display_name":"Rodrigo de Lazcano","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"de Lazcano, Rodrigo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031004793","display_name":"Lucas Willems","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Willems, Lucas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030085635","display_name":"Salem Lahlou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lahlou, Salem","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101222969","display_name":"Suman Pal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pal, Suman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068291173","display_name":"Pablo Samuel Castro","orcid":"https://orcid.org/0000-0002-3206-336X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Castro, Pablo Samuel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032423586","display_name":"Jordan Terry","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Terry, Jordan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5027634678"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9276000261306763,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9230999946594238,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.7880363464355469},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.7679978609085083},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7442582845687866},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.6757367849349976},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6492027640342712},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.5361928939819336},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.494123250246048},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.488869845867157},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.43246573209762573},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.325853168964386},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2520207166671753},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.20595312118530273},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.11810499429702759},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10659295320510864}],"concepts":[{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.7880363464355469},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.7679978609085083},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7442582845687866},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.6757367849349976},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6492027640342712},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5361928939819336},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.494123250246048},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.488869845867157},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.43246573209762573},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.325853168964386},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2520207166671753},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.20595312118530273},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.11810499429702759},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10659295320510864},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2306.13831","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.13831","pdf_url":"https://arxiv.org/pdf/2306.13831","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2306.13831","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2306.13831","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2306.13831","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.13831","pdf_url":"https://arxiv.org/pdf/2306.13831","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.4300000071525574}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4382319558.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4231704780","https://openalex.org/W2618286804","https://openalex.org/W2083794993","https://openalex.org/W352609212","https://openalex.org/W2329643025","https://openalex.org/W1511772879","https://openalex.org/W4379115841","https://openalex.org/W2002770077","https://openalex.org/W3131163342","https://openalex.org/W3088304681"],"abstract_inverted_index":{"We":[0,77],"present":[1],"the":[2,51,67,80,85,105],"Minigrid":[3,89,113],"and":[4,14,72,90,102,114],"Miniworld":[5,91,115],"libraries":[6,18],"which":[7],"provide":[8],"a":[9,23,36,43,57],"suite":[10],"of":[11,39,60,112],"goal-oriented":[12],"2D":[13],"3D":[15],"environments.":[16],"The":[17,109],"were":[19],"explicitly":[20],"created":[21],"with":[22,123],"minimalistic":[24],"design":[25,68],"paradigm":[26],"to":[27,30],"allow":[28],"users":[29],"rapidly":[31],"develop":[32],"new":[33],"environments":[34],"for":[35],"wide":[37,58],"range":[38,59],"research-specific":[40],"needs.":[41],"As":[42],"result,":[44],"both":[45,99],"have":[46],"received":[47],"widescale":[48],"adoption":[49],"by":[50,84],"RL":[52,100],"community,":[53],"facilitating":[54],"research":[55],"in":[56],"areas.":[61],"In":[62],"this":[63],"paper,":[64],"we":[65],"outline":[66],"philosophy,":[69],"environment":[70],"details,":[71],"their":[73,124],"world":[74],"generation":[75],"API.":[76],"also":[78],"showcase":[79],"additional":[81],"capabilities":[82],"brought":[83],"unified":[86],"API":[87],"between":[88,104],"through":[92],"case":[93],"studies":[94],"on":[95],"transfer":[96],"learning":[97],"(for":[98],"agents":[101],"humans)":[103],"different":[106],"observation":[107],"spaces.":[108],"source":[110],"code":[111],"can":[116],"be":[117],"found":[118],"at":[119,126],"https://github.com/Farama-Foundation/{Minigrid,":[120],"Miniworld}":[121],"along":[122],"documentation":[125],"https://{minigrid,":[127],"miniworld}.farama.org/.":[128]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2023-06-28T00:00:00"}
