{"id":"https://openalex.org/W2895806276","doi":"https://doi.org/10.1109/ijcnn.2018.8489712","title":"Accelerating Deep Continuous Reinforcement Learning through Task Simplification","display_name":"Accelerating Deep Continuous Reinforcement Learning through Task Simplification","publication_year":2018,"publication_date":"2018-07-01","ids":{"openalex":"https://openalex.org/W2895806276","doi":"https://doi.org/10.1109/ijcnn.2018.8489712","mag":"2895806276"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2018.8489712","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2018.8489712","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040045142","display_name":"Matthias Kerzel","orcid":"https://orcid.org/0000-0002-1378-0435"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Matthias Kerzel","raw_affiliation_strings":["Knowledge Technology, University of Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Knowledge Technology, University of Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056889683","display_name":"Hadi Beik-Mohammadi","orcid":"https://orcid.org/0000-0002-8170-2471"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hadi Beik Mohammadi","raw_affiliation_strings":["Knowledge Technology, University of Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Knowledge Technology, University of Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101942305","display_name":"Mohammad Ali Zamani","orcid":"https://orcid.org/0000-0001-5350-645X"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Mohammad Ali Zamani","raw_affiliation_strings":["Knowledge Technology, University of Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Knowledge Technology, University of Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033486668","display_name":"Stefan Wermter","orcid":"https://orcid.org/0000-0003-1343-4775"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stefan Wermter","raw_affiliation_strings":["Knowledge Technology, University of Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Knowledge Technology, University of Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5040045142"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"],"apc_list":null,"apc_paid":null,"fwci":1.3522,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86114752,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"518","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.9509000182151794,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8037571907043457},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.754660964012146},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6523547172546387},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5278931856155396},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.328289270401001},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10115882754325867}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8037571907043457},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.754660964012146},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6523547172546387},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5278931856155396},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.328289270401001},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10115882754325867},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn.2018.8489712","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2018.8489712","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W620953717","https://openalex.org/W1682403713","https://openalex.org/W2038476873","https://openalex.org/W2048226872","https://openalex.org/W2101539915","https://openalex.org/W2105899777","https://openalex.org/W2129403174","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2165150801","https://openalex.org/W2173248099","https://openalex.org/W2201581102","https://openalex.org/W2201912979","https://openalex.org/W2271840356","https://openalex.org/W2403087184","https://openalex.org/W2443711627","https://openalex.org/W2512143431","https://openalex.org/W2767077819","https://openalex.org/W2795520063","https://openalex.org/W2950471160","https://openalex.org/W2962736495","https://openalex.org/W2963477884","https://openalex.org/W2963864421","https://openalex.org/W2964161785","https://openalex.org/W4229950067","https://openalex.org/W4233071312","https://openalex.org/W4245108548","https://openalex.org/W4302570325","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6687681856","https://openalex.org/W6694517276","https://openalex.org/W6696324988","https://openalex.org/W6697071109","https://openalex.org/W6745666072"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4306904969","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W3196817267","https://openalex.org/W1976600725"],"abstract_inverted_index":{"Robotic":[0],"motor":[1],"policies":[2],"can,":[3],"in":[4,24],"theory,":[5],"be":[6],"learned":[7,69],"via":[8],"deep":[9,150],"continuous":[10],"reinforcement":[11,151],"learning.":[12,139],"In":[13],"practice,":[14],"however,":[15],"collecting":[16],"the":[17,28,45,53,71,85,105,113,119,126,130,136,146],"enormous":[18],"amount":[19],"of":[20,30,138,149],"required":[21],"training":[22,84,89,107],"samples":[23],"realistic":[25,147],"time,":[26],"surpasses":[27],"possibilities":[29],"many":[31],"robotic":[32,154],"platforms.":[33,155],"To":[34],"address":[35],"this":[36,141],"problem,":[37],"we":[38,143],"propose":[39],"a":[40,64,100],"novel":[41],"method":[42],"for":[43],"accelerating":[44],"learning":[46,127,132,152],"process":[47,128],"by":[48,52,82],"task":[49,66],"simplification":[50,79],"inspired":[51],"Goldilocks":[54],"effect":[55],"known":[56],"from":[57,129],"developmental":[58],"psychology.":[59],"We":[60,98,109],"present":[61],"results":[62],"on":[63,153],"reach-for-grasp":[65],"that":[67,91],"is":[68,80],"with":[70,87,116],"Deep":[72],"Deterministic":[73],"Policy":[74],"Gradients":[75],"(DDPG)":[76],"algorithm.":[77],"Task":[78],"realized":[81],"initially":[83],"system":[86],"\u201clarger-than-life\u201d":[88],"objects":[90],"adapt":[92],"their":[93],"reachability":[94],"dynamically":[95],"during":[96,125],"training.":[97],"achieve":[99],"significant":[101],"acceleration":[102],"compared":[103],"to":[104,112,118,122],"unaltered":[106],"setup.":[108],"describe":[110],"modifications":[111],"DDPG":[114],"algorithm":[115],"regard":[117],"replay":[120],"buffer":[121],"prevent":[123],"artifacts":[124],"simplified":[131],"instances":[133],"while":[134],"maintaining":[135],"speed":[137],"With":[140],"result,":[142],"contribute":[144],"towards":[145],"application":[148]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
