{"id":"https://openalex.org/W2902018790","doi":"https://doi.org/10.1142/s0219622019500093","title":"Directed Exploration in Black-Box Optimization for Multi-Objective Reinforcement Learning","display_name":"Directed Exploration in Black-Box Optimization for Multi-Objective Reinforcement Learning","publication_year":2018,"publication_date":"2018-11-28","ids":{"openalex":"https://openalex.org/W2902018790","doi":"https://doi.org/10.1142/s0219622019500093","mag":"2902018790"},"language":"en","primary_location":{"id":"doi:10.1142/s0219622019500093","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219622019500093","pdf_url":null,"source":{"id":"https://openalex.org/S207089700","display_name":"International Journal of Information Technology & Decision Making","issn_l":"0219-6220","issn":["0219-6220","1793-6845"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Information Technology &amp; Decision Making","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100645872","display_name":"Javier Garc\u00eda","orcid":"https://orcid.org/0000-0002-5638-5240"},"institutions":[{"id":"https://openalex.org/I200284239","display_name":"Universidade de Santiago de Compostela","ror":"https://ror.org/030eybx10","country_code":"ES","type":"education","lineage":["https://openalex.org/I200284239"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Javier Garc\u00eda","raw_affiliation_strings":["CiTIUS, Universidade de Santiago de Compostela, Santiago de Compostela, Spain"],"raw_orcid":"https://orcid.org/0000-0002-5638-5240","affiliations":[{"raw_affiliation_string":"CiTIUS, Universidade de Santiago de Compostela, Santiago de Compostela, Spain","institution_ids":["https://openalex.org/I200284239"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086413658","display_name":"Roberto Iglesias","orcid":"https://orcid.org/0000-0002-6279-5190"},"institutions":[{"id":"https://openalex.org/I200284239","display_name":"Universidade de Santiago de Compostela","ror":"https://ror.org/030eybx10","country_code":"ES","type":"education","lineage":["https://openalex.org/I200284239"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Roberto Iglesias","raw_affiliation_strings":["CiTIUS, Universidade de Santiago de Compostela, Santiago de Compostela, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CiTIUS, Universidade de Santiago de Compostela, Santiago de Compostela, Spain","institution_ids":["https://openalex.org/I200284239"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100678780","display_name":"Miguel A. Rodr\u00edguez","orcid":"https://orcid.org/0000-0003-4150-6561"},"institutions":[{"id":"https://openalex.org/I200284239","display_name":"Universidade de Santiago de Compostela","ror":"https://ror.org/030eybx10","country_code":"ES","type":"education","lineage":["https://openalex.org/I200284239"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Miguel A. Rodr\u00edguez","raw_affiliation_strings":["CiTIUS, Universidade de Santiago de Compostela, Santiago de Compostela, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CiTIUS, Universidade de Santiago de Compostela, Santiago de Compostela, Spain","institution_ids":["https://openalex.org/I200284239"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007784230","display_name":"Carlos V. Regueiro","orcid":"https://orcid.org/0000-0003-3672-1726"},"institutions":[{"id":"https://openalex.org/I11019714","display_name":"Universidade da Coru\u00f1a","ror":"https://ror.org/01qckj285","country_code":"ES","type":"education","lineage":["https://openalex.org/I11019714"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Carlos V. Regueiro","raw_affiliation_strings":["Department of Electronics and Systems, Universidade de Coru\u00f1a, A Coru\u00f1a, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electronics and Systems, Universidade de Coru\u00f1a, A Coru\u00f1a, Spain","institution_ids":["https://openalex.org/I11019714"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100645872"],"corresponding_institution_ids":["https://openalex.org/I200284239"],"apc_list":null,"apc_paid":null,"fwci":0.3385,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.70032595,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"18","issue":"03","first_page":"1045","last_page":"1082"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8810751438140869},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7049058079719543},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5455959439277649},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5033475756645203},{"id":"https://openalex.org/keywords/black-box","display_name":"Black box","score":0.49866557121276855},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.49256306886672974},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.47545406222343445},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.47363510727882385},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.4467495083808899},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18942192196846008},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16157925128936768}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8810751438140869},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7049058079719543},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5455959439277649},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5033475756645203},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.49866557121276855},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.49256306886672974},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47545406222343445},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.47363510727882385},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.4467495083808899},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18942192196846008},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16157925128936768},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1142/s0219622019500093","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219622019500093","pdf_url":null,"source":{"id":"https://openalex.org/S207089700","display_name":"International Journal of Information Technology & Decision Making","issn_l":"0219-6220","issn":["0219-6220","1793-6845"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Information Technology &amp; Decision Making","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:wsi:ijitdm:v:18:y:2019:i:03:n:s0219622019500093","is_oa":false,"landing_page_url":"http://www.worldscientific.com/doi/abs/10.1142/S0219622019500093","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W174941419","https://openalex.org/W755046805","https://openalex.org/W1480799349","https://openalex.org/W1497976081","https://openalex.org/W1543572792","https://openalex.org/W1762172548","https://openalex.org/W1977655452","https://openalex.org/W1998649829","https://openalex.org/W1999371911","https://openalex.org/W2012392077","https://openalex.org/W2012612381","https://openalex.org/W2016765487","https://openalex.org/W2045276965","https://openalex.org/W2053900989","https://openalex.org/W2058192020","https://openalex.org/W2097381042","https://openalex.org/W2098907614","https://openalex.org/W2100830227","https://openalex.org/W2102660061","https://openalex.org/W2115167570","https://openalex.org/W2117626647","https://openalex.org/W2119717200","https://openalex.org/W2127107099","https://openalex.org/W2139053308","https://openalex.org/W2142916680","https://openalex.org/W2160088187","https://openalex.org/W2174817438","https://openalex.org/W2186629830","https://openalex.org/W2245192865","https://openalex.org/W2475712273","https://openalex.org/W2475886745","https://openalex.org/W3103262232","https://openalex.org/W4214717370"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W1630076647","https://openalex.org/W20361778","https://openalex.org/W2024136090"],"abstract_inverted_index":{"Usually,":[0],"real-world":[1],"problems":[2,12,138],"involve":[3],"the":[4,32,49,62,72,89,99,106,121,124,141,155,162,168,180,186,192,207,211],"optimization":[5],"of":[6,26,51,64,91,94,123,154,164,170,179],"multiple,":[7],"possibly":[8],"conflicting,":[9],"objectives.":[10],"These":[11],"may":[13],"be":[14,68],"addressed":[15],"by":[16],"Multi-objective":[17],"Reinforcement":[18,28],"learning":[19,52,107,125],"(MORL)":[20],"techniques.":[21],"MORL":[22,47,135,201],"is":[23,36,48,194],"a":[24,92,117,132,176,222],"generalization":[25],"standard":[27],"Learning":[29],"(RL)":[30],"where":[31],"single":[33],"reward":[34],"signal":[35],"extended":[37],"to":[38,70,74,96,173,188,198],"multiple":[39,56],"signals,":[40],"in":[41,103,139],"particular,":[42],"one":[43],"for":[44,137,158],"each":[45,159],"objective.":[46],"process":[50],"policies":[53,113],"that":[54],"optimize":[55],"objectives":[57,143],"simultaneously.":[58],"In":[59,127],"these":[60],"problems,":[61],"use":[63,90,111,163,169],"directional/gradient":[65],"information":[66,101,157,172],"can":[67],"useful":[69],"guide":[71],"exploration":[73,187],"better":[75,77],"and":[76,109,167,183,196,210,220],"behaviors.":[78],"However,":[79],"traditional":[80],"policy-gradient":[81],"approaches":[82],"have":[83,116],"two":[84,149],"main":[85,150],"drawbacks:":[86],"they":[87,110],"require":[88],"batch":[93],"episodes":[95],"properly":[97],"estimate":[98],"gradient":[100,156,200],"(reducing":[102],"this":[104,128,171],"way":[105],"speed),":[108],"stochastic":[112],"which":[114,140],"could":[115],"disastrous":[118],"impact":[119],"on":[120,203,218,221],"safety":[122],"system.":[126],"paper,":[129],"we":[130],"present":[131],"novel":[133],"population-based":[134],"algorithm":[136,193],"underlying":[142],"are":[144],"reasonably":[145],"smooth.":[146],"It":[147],"presents":[148],"characteristics:":[151],"fast":[152],"computation":[153],"objective":[160],"through":[161],"neighboring":[165],"solutions,":[166],"carry":[174],"out":[175],"geometric":[177],"partition":[178],"search":[181],"space":[182],"thus":[184],"direct":[185],"promising":[189],"areas.":[190],"Finally,":[191],"evaluated":[195],"compared":[197],"policy":[199],"algorithms":[202],"different":[204],"multi-objective":[205],"problems:":[206],"water":[208],"reservoir":[209],"biped":[212],"walking":[213],"problem":[214],"(the":[215],"latter":[216],"both":[217],"simulation":[219],"real":[223],"robot).":[224]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
