{"id":"https://openalex.org/W4402745307","doi":"https://doi.org/10.1145/3696426","title":"Synergizing Quality-Diversity with Descriptor-Conditioned Reinforcement Learning","display_name":"Synergizing Quality-Diversity with Descriptor-Conditioned Reinforcement Learning","publication_year":2024,"publication_date":"2024-09-23","ids":{"openalex":"https://openalex.org/W4402745307","doi":"https://doi.org/10.1145/3696426"},"language":"en","primary_location":{"id":"doi:10.1145/3696426","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696426","pdf_url":null,"source":{"id":"https://openalex.org/S4210221532","display_name":"ACM Transactions on Evolutionary Learning and Optimization","issn_l":"2688-299X","issn":["2688-299X","2688-3007"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Evolutionary Learning and Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3696426","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082433472","display_name":"Maxence Faldor","orcid":"https://orcid.org/0000-0003-4743-9494"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Maxence Faldor","raw_affiliation_strings":["Imperial College London, London, UK","Imperial College London, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0003-4743-9494","affiliations":[{"raw_affiliation_string":"Imperial College London, London, UK","institution_ids":["https://openalex.org/I47508984"]},{"raw_affiliation_string":"Imperial College London, United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057428630","display_name":"F\u00e9lix Chalumeau","orcid":"https://orcid.org/0000-0001-9476-2900"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"F\u00e9lix Chalumeau","raw_affiliation_strings":["InstaDeep, Cape Town, South Africa","InstaDeep, South Africa"],"raw_orcid":"https://orcid.org/0000-0001-9476-2900","affiliations":[{"raw_affiliation_string":"InstaDeep, Cape Town, South Africa","institution_ids":[]},{"raw_affiliation_string":"InstaDeep, South Africa","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060282741","display_name":"Manon Flageat","orcid":"https://orcid.org/0000-0002-4601-2176"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Manon Flageat","raw_affiliation_strings":["Imperial College London, London, UK","Imperial College London, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0002-4601-2176","affiliations":[{"raw_affiliation_string":"Imperial College London, London, UK","institution_ids":["https://openalex.org/I47508984"]},{"raw_affiliation_string":"Imperial College London, United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011747084","display_name":"Antoine Cully","orcid":"https://orcid.org/0000-0002-3190-7073"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Antoine Cully","raw_affiliation_strings":["Imperial College London, London, UK","Imperial College London, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0002-3190-7073","affiliations":[{"raw_affiliation_string":"Imperial College London, London, UK","institution_ids":["https://openalex.org/I47508984"]},{"raw_affiliation_string":"Imperial College London, United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2219,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.83039491,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"5","issue":"1","first_page":"1","last_page":"35"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.973800003528595,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10551","display_name":"Scheduling and Optimization Algorithms","score":0.9660000205039978,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6399725079536438},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6300089359283447},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.5843855738639832},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4909661114215851},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.4481661021709442},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.35374557971954346},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3147914707660675},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.16413742303848267},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.10031622648239136},{"id":"https://openalex.org/keywords/epistemology","display_name":"Epistemology","score":0.047875672578811646}],"concepts":[{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6399725079536438},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6300089359283447},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.5843855738639832},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4909661114215851},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4481661021709442},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.35374557971954346},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3147914707660675},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.16413742303848267},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.10031622648239136},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.047875672578811646},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3696426","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696426","pdf_url":null,"source":{"id":"https://openalex.org/S4210221532","display_name":"ACM Transactions on Evolutionary Learning and Optimization","issn_l":"2688-299X","issn":["2688-299X","2688-3007"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Evolutionary Learning and Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3696426","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696426","pdf_url":null,"source":{"id":"https://openalex.org/S4210221532","display_name":"ACM Transactions on Evolutionary Learning and Optimization","issn_l":"2688-299X","issn":["2688-299X","2688-3007"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Evolutionary Learning and Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1738827650","https://openalex.org/W2137983211","https://openalex.org/W2145339207","https://openalex.org/W2257979135","https://openalex.org/W2462548332","https://openalex.org/W2556477470","https://openalex.org/W2606327391","https://openalex.org/W2612690371","https://openalex.org/W2739858255","https://openalex.org/W2904246096","https://openalex.org/W2924740141","https://openalex.org/W2962687375","https://openalex.org/W2963438456","https://openalex.org/W2978280891","https://openalex.org/W2981030070","https://openalex.org/W2990138404","https://openalex.org/W3009840839","https://openalex.org/W3018036994","https://openalex.org/W3101747404","https://openalex.org/W3112009288","https://openalex.org/W3129322645","https://openalex.org/W3175917212","https://openalex.org/W4285734657","https://openalex.org/W4287757595","https://openalex.org/W4298857966","https://openalex.org/W4313889501","https://openalex.org/W4319049892","https://openalex.org/W4320167187","https://openalex.org/W4320812348","https://openalex.org/W4324106947","https://openalex.org/W4360888841","https://openalex.org/W4384008427","https://openalex.org/W4384024810","https://openalex.org/W4386523338","https://openalex.org/W4394662461","https://openalex.org/W4402157808"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"A":[0],"hallmark":[1],"of":[2,12,32,101,124,137,149,181,187],"intelligence":[3],"is":[4],"the":[5,97,114,122,125,133,138,153,169,182,188,204,207,213],"ability":[6],"to":[7,28,59,66,117,160,218],"exhibit":[8],"a":[9,30,39,157,197],"wide":[10],"range":[11],"effective":[13,120],"behaviors.":[14],"Inspired":[15],"by":[16,191],"this":[17,75,142],"principle,":[18],"Quality-Diversity":[19,102],"algorithms,":[20],"such":[21,63],"as":[22,38,64,156],"MAP-Elites,":[23,94],"are":[24,165],"evolutionary":[25],"methods":[26,78,110],"designed":[27],"generate":[29],"set":[31],"diverse":[33,162],"and":[34,81,99,135,184,211],"high-fitness":[35],"solutions.":[36],"However,":[37],"genetic":[40],"algorithm,":[41],"MAP-Elites":[42],"relies":[43],"on":[44,203],"random":[45],"mutations,":[46,121],"which":[47,86,164],"can":[48],"become":[49],"inefficient":[50],"in":[51,104,130],"high-dimensional":[52,71,106],"search":[53],"spaces,":[54],"thus":[55],"limiting":[56],"its":[57],"scalability":[58],"more":[60,119],"complex":[61],"domains,":[62],"learning":[65],"control":[67],"agents":[68],"directly":[69],"from":[70,90,216],"inputs.":[72],"To":[73],"address":[74],"limitation,":[76],"advanced":[77],"like":[79],"PGA-MAP-Elites":[80,217],"DCG-MAP-Elites":[82,150],"have":[83,111],"been":[84],"developed,":[85],"combine":[87],"actor-critic":[88],"techniques":[89],"Reinforcement":[91],"Learning":[92],"with":[93],"significantly":[95],"enhancing":[96],"performance":[98,214],"efficiency":[100],"algorithms":[103],"complex,":[105],"tasks.":[107],"While":[108],"these":[109],"successfully":[112],"leveraged":[113],"trained":[115,126],"critic":[116],"guide":[118],"potential":[123],"actor":[127,155],"remains":[128],"underutilized":[129],"improving":[131],"both":[132],"quality":[134],"diversity":[136],"evolved":[139],"population.":[140],"In":[141],"work,":[143],"we":[144,176,195],"introduce":[145],"DCRL-MAP-Elites,":[146],"an":[147,178],"extension":[148],"that":[151],"utilizes":[152],"descriptor-conditioned":[154],"generative":[158],"model":[159],"produce":[161],"solutions,":[163],"then":[166],"injected":[167],"into":[168],"offspring":[170],"batch":[171],"at":[172],"each":[173,192],"generation.":[174],"Additionally,":[175],"present":[177,196],"empirical":[179,199],"analysis":[180,200],"fitness":[183],"descriptor":[185],"reproducibility":[186],"solutions":[189],"discovered":[190],"algorithm.":[193],"Finally,":[194],"second":[198],"shedding":[201],"light":[202],"synergies":[205],"between":[206],"different":[208],"variations":[209],"operators":[210],"explaining":[212],"improvement":[215],"DCRL-MAP-Elites.":[219]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
