{"id":"https://openalex.org/W2970019863","doi":"https://doi.org/10.1109/bcd.2019.8885129","title":"Multi-task Deep Reinforcement Learning with Evolutionary Algorithm and Policy Gradients Method in 3D Control Tasks","display_name":"Multi-task Deep Reinforcement Learning with Evolutionary Algorithm and Policy Gradients Method in 3D Control Tasks","publication_year":2019,"publication_date":"2019-05-01","ids":{"openalex":"https://openalex.org/W2970019863","doi":"https://doi.org/10.1109/bcd.2019.8885129","mag":"2970019863"},"language":"en","primary_location":{"id":"doi:10.1109/bcd.2019.8885129","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bcd.2019.8885129","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data, Cloud Computing, Data Science &amp; Engineering (BCD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086371271","display_name":"Shota Imai","orcid":"https://orcid.org/0009-0005-2514-6449"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Shota Imai","raw_affiliation_strings":["Graduate School of Informatics and Engineering, University of Electro-Communications, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics and Engineering, University of Electro-Communications, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045716284","display_name":"Yuichi Sei","orcid":"https://orcid.org/0000-0002-2552-6717"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuichi Sei","raw_affiliation_strings":["Graduate School of Informatics and Engineering, University of Electro-Communications, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics and Engineering, University of Electro-Communications, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013743040","display_name":"Yasuyuki Tahara","orcid":"https://orcid.org/0000-0002-1939-4455"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yasuyuki Tahara","raw_affiliation_strings":["Graduate School of Informatics and Engineering, University of Electro-Communications, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics and Engineering, University of Electro-Communications, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017186793","display_name":"Ryohei Orihara","orcid":"https://orcid.org/0000-0002-9039-7704"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryohei Orihara","raw_affiliation_strings":["Graduate School of Informatics and Engineering, University of Electro-Communications, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics and Engineering, University of Electro-Communications, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013259601","display_name":"Akihiko Ohsuga","orcid":"https://orcid.org/0000-0001-6717-7028"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Akihiko Ohsuga","raw_affiliation_strings":["Graduate School of Informatics and Engineering, University of Electro-Communications, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics and Engineering, University of Electro-Communications, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5086371271"],"corresponding_institution_ids":["https://openalex.org/I20529979"],"apc_list":null,"apc_paid":null,"fwci":0.14,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.56536319,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":"abs 1412 6980","issue":null,"first_page":"100","last_page":"105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9771999716758728,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9763000011444092,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8571482300758362},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7899916172027588},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6674097776412964},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6454778909683228},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6180282831192017},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5687664747238159},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5171846151351929},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5063966512680054},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.496345579624176},{"id":"https://openalex.org/keywords/evolutionary-algorithm","display_name":"Evolutionary algorithm","score":0.47381770610809326},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.4566454291343689}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8571482300758362},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7899916172027588},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6674097776412964},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6454778909683228},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6180282831192017},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5687664747238159},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5171846151351929},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5063966512680054},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.496345579624176},{"id":"https://openalex.org/C159149176","wikidata":"https://www.wikidata.org/wiki/Q14489129","display_name":"Evolutionary algorithm","level":2,"score":0.47381770610809326},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.4566454291343689},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bcd.2019.8885129","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bcd.2019.8885129","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data, Cloud Computing, Data Science &amp; Engineering (BCD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1515851193","https://openalex.org/W1522301498","https://openalex.org/W1557517019","https://openalex.org/W1757796397","https://openalex.org/W1821462560","https://openalex.org/W2060277733","https://openalex.org/W2099397840","https://openalex.org/W2111935653","https://openalex.org/W2132083787","https://openalex.org/W2137983211","https://openalex.org/W2145339207","https://openalex.org/W2148520070","https://openalex.org/W2155968351","https://openalex.org/W2163605009","https://openalex.org/W2165150801","https://openalex.org/W2171658832","https://openalex.org/W2173248099","https://openalex.org/W2257979135","https://openalex.org/W2293467699","https://openalex.org/W2342662072","https://openalex.org/W2443711627","https://openalex.org/W2583761661","https://openalex.org/W2596367596","https://openalex.org/W2601066903","https://openalex.org/W2604763608","https://openalex.org/W2735995851","https://openalex.org/W2754517384","https://openalex.org/W2786036274","https://openalex.org/W2787938642","https://openalex.org/W2894662639","https://openalex.org/W2898585845","https://openalex.org/W2908064123","https://openalex.org/W2951799422","https://openalex.org/W2963199420","https://openalex.org/W2963390419","https://openalex.org/W2963477884","https://openalex.org/W2963864421","https://openalex.org/W2964161785","https://openalex.org/W2990747716","https://openalex.org/W3037207827","https://openalex.org/W3103780890","https://openalex.org/W3146803896","https://openalex.org/W4298857966","https://openalex.org/W4302570325","https://openalex.org/W6637967152","https://openalex.org/W6638523607","https://openalex.org/W6674944505","https://openalex.org/W6681978006","https://openalex.org/W6684191040","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6704571135","https://openalex.org/W6732467815","https://openalex.org/W6735641298","https://openalex.org/W6736057607","https://openalex.org/W6740879895","https://openalex.org/W6744123322","https://openalex.org/W6748839928","https://openalex.org/W6755459763","https://openalex.org/W6755903938","https://openalex.org/W6770858630","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W2032233321","https://openalex.org/W3121970507","https://openalex.org/W2110028391","https://openalex.org/W54497855","https://openalex.org/W217960748","https://openalex.org/W4306904969","https://openalex.org/W3125814499","https://openalex.org/W2090827041","https://openalex.org/W2094012830","https://openalex.org/W187246281"],"abstract_inverted_index":{"In":[0,86,120,146],"deep":[1],"reinforcement":[2],"learning,":[3],"it":[4,31],"is":[5,12,17,32,173],"difficult":[6],"to":[7,35,94,134],"converge":[8],"when":[9],"the":[10,23,47,109,147,157,164,171,175,180],"exploration":[11,26],"insufficient":[13],"or":[14],"a":[15,91,96,128,136],"reward":[16],"sparse.":[18],"Besides,":[19],"on":[20,37,46,63,83,102,163],"specific":[21],"tasks,":[22,166],"amount":[24],"of":[25,80,104,131,179],"may":[27],"be":[28],"limited.":[29],"Therefore,":[30],"considered":[33],"effective":[34,170],"learn":[36],"source":[38,154,165],"tasks":[39,106,178],"that":[40,59,98],"were":[41],"previously":[42],"for":[43,56,174],"promoting":[44],"learning":[45,57,62],"target":[48,105,181],"tasks.":[49,65,155,182],"Existing":[50],"researches":[51],"have":[52],"proposed":[53],"pretraining":[54,92],"methods":[55,68],"parameters":[58],"enable":[60],"fast":[61],"multiple":[64,125,151],"However,":[66],"these":[67],"are":[69],"still":[70],"limited":[71],"by":[72],"several":[73],"problems,":[74],"such":[75],"as":[76],"sparse":[77],"reward,":[78],"deviation":[79],"samples,":[81],"dependence":[82],"initial":[84],"parameters.":[85],"this":[87,121],"research,":[88],"we":[89,149,167],"propose":[90],"method":[93,162],"train":[95,135],"model":[97,138,158,172],"can":[99],"work":[100],"well":[101],"variety":[103],"and":[107,116,142],"solve":[108],"above":[110],"problems":[111],"with":[112,127,139,160],"an":[113],"evolutionary":[114,140],"algorithm":[115,141],"policy":[117,143],"gradients":[118,144],"method.":[119,145],"method,":[122],"agents":[123],"explore":[124],"environments":[126],"diverse":[129],"set":[130],"neural":[132],"networks":[133],"general":[137],"experiments,":[148],"assume":[150],"3D":[152,176],"control":[153,177],"After":[156],"training":[159],"our":[161],"show":[168],"how":[169]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}