{"id":"https://openalex.org/W2914871904","doi":"https://doi.org/10.1109/humanoids.2018.8625003","title":"Learning Deep Robot Controllers by Exploiting Successful and Failed Executions","display_name":"Learning Deep Robot Controllers by Exploiting Successful and Failed Executions","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2914871904","doi":"https://doi.org/10.1109/humanoids.2018.8625003","mag":"2914871904"},"language":"en","primary_location":{"id":"doi:10.1109/humanoids.2018.8625003","is_oa":false,"landing_page_url":"https://doi.org/10.1109/humanoids.2018.8625003","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE-RAS 18th International Conference on Humanoid Robots (Humanoids)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084329458","display_name":"Domingo Esteban","orcid":"https://orcid.org/0000-0002-3134-0281"},"institutions":[{"id":"https://openalex.org/I83816512","display_name":"University of Genoa","ror":"https://ror.org/0107c5v14","country_code":"IT","type":"education","lineage":["https://openalex.org/I83816512"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Domingo Esteban","raw_affiliation_strings":["DIBRIS, Universita di Genova, Italy"],"affiliations":[{"raw_affiliation_string":"DIBRIS, Universita di Genova, Italy","institution_ids":["https://openalex.org/I83816512"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063761733","display_name":"Leonel Rozo","orcid":"https://orcid.org/0000-0001-5970-9135"},"institutions":[{"id":"https://openalex.org/I30771326","display_name":"Italian Institute of Technology","ror":"https://ror.org/042t93s57","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Leonel Rozo","raw_affiliation_strings":["Department of Advanced Robotics, Istituto Italiano di Tecnologia, Genova, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Advanced Robotics, Istituto Italiano di Tecnologia, Genova, Italy","institution_ids":["https://openalex.org/I30771326"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044243940","display_name":"Darwin G. Caldwell","orcid":"https://orcid.org/0000-0002-6233-9961"},"institutions":[{"id":"https://openalex.org/I30771326","display_name":"Italian Institute of Technology","ror":"https://ror.org/042t93s57","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Darwin G. Caldwell","raw_affiliation_strings":["Department of Advanced Robotics, Istituto Italiano di Tecnologia, Genova, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Advanced Robotics, Istituto Italiano di Tecnologia, Genova, Italy","institution_ids":["https://openalex.org/I30771326"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5084329458"],"corresponding_institution_ids":["https://openalex.org/I83816512"],"apc_list":null,"apc_paid":null,"fwci":0.3258,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.69679303,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"2","issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.7836641073226929},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7091418504714966},{"id":"https://openalex.org/keywords/safer","display_name":"SAFER","score":0.699856698513031},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6277602314949036},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5951522588729858},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5540744066238403},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5068379044532776},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.5030199885368347},{"id":"https://openalex.org/keywords/global-positioning-system","display_name":"Global Positioning System","score":0.49129074811935425},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.46975138783454895},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.10707437992095947}],"concepts":[{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.7836641073226929},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7091418504714966},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.699856698513031},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6277602314949036},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5951522588729858},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5540744066238403},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5068379044532776},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.5030199885368347},{"id":"https://openalex.org/C60229501","wikidata":"https://www.wikidata.org/wiki/Q18822","display_name":"Global Positioning System","level":2,"score":0.49129074811935425},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46975138783454895},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.10707437992095947},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/humanoids.2018.8625003","is_oa":false,"landing_page_url":"https://doi.org/10.1109/humanoids.2018.8625003","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE-RAS 18th International Conference on Humanoid Robots (Humanoids)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1499669280","https://openalex.org/W1771410628","https://openalex.org/W1923344279","https://openalex.org/W1977655452","https://openalex.org/W1985578768","https://openalex.org/W1991618009","https://openalex.org/W2001095967","https://openalex.org/W2012587148","https://openalex.org/W2087617385","https://openalex.org/W2121103318","https://openalex.org/W2126909264","https://openalex.org/W2136719407","https://openalex.org/W2167856595","https://openalex.org/W2287850282","https://openalex.org/W2295431040","https://openalex.org/W2413397042","https://openalex.org/W2443711627","https://openalex.org/W2528734395","https://openalex.org/W2529601334","https://openalex.org/W2601477941","https://openalex.org/W2604726708","https://openalex.org/W2963630259","https://openalex.org/W2963864421","https://openalex.org/W2964161785","https://openalex.org/W4205513846","https://openalex.org/W4297802064","https://openalex.org/W6629881138","https://openalex.org/W6638018090","https://openalex.org/W6678367057","https://openalex.org/W6684921986","https://openalex.org/W6696273291","https://openalex.org/W6725508859","https://openalex.org/W6735866614","https://openalex.org/W6736742618"],"related_works":["https://openalex.org/W2953205341","https://openalex.org/W235065745","https://openalex.org/W2029935773","https://openalex.org/W4377865163","https://openalex.org/W3193857078","https://openalex.org/W2888956734","https://openalex.org/W3000197790","https://openalex.org/W4315865067","https://openalex.org/W2979433843","https://openalex.org/W3208304128"],"abstract_inverted_index":{"The":[0],"prohibitively":[1],"amount":[2],"of":[3,49,90,101,121],"data":[4],"required":[5],"when":[6],"learning":[7,27,63,139],"complex":[8,65],"nonlinear":[9],"policies,":[10],"such":[11],"as":[12,46],"deep":[13,114],"neural":[14,115,168],"networks,":[15],"has":[16],"been":[17],"significantly":[18],"reduced":[19],"with":[20,118,128,175,203],"guided":[21,171],"policy":[22,73,138,183],"search":[23],"(GPS)algorithms.":[24],"However,":[25],"while":[26,62],"the":[28,31,71,88,99,137,142,179,182,204],"control":[29],"policy,":[30],"robot":[32,72],"might":[33],"fail":[34],"and":[35,54,94,186],"therefore":[36,95,187],"generate":[37],"unacceptable":[38],"guiding":[39],"samples.":[40],"Failures":[41],"may":[42,192],"arise,":[43],"for":[44],"example,":[45],"a":[47,64,194],"consequence":[48],"modeling":[50],"or":[51,75,206],"environmental":[52],"uncertainties,":[53],"thus":[55],"unsuccessful":[56,80],"interactions":[57,103],"should":[58],"be":[59],"explicitly":[60],"considered":[61],"policy.":[66],"Currently,":[67],"GPS":[68],"methods":[69,86],"update":[70],"discarding":[74],"giving":[76],"low":[77],"probability":[78],"to":[79,97,112,151],"trials.":[81],"In":[82,107],"other":[83],"words,":[84],"these":[85,102],"overlook":[87],"existence":[89],"poorly":[91],"performing":[92],"executions,":[93],"tend":[96],"underestimate":[98],"information":[100],"in":[104,197],"next":[105],"iterations.":[106],"this":[108],"paper":[109],"we":[110],"propose":[111],"learn":[113],"network":[116,169],"controllers":[117],"an":[119],"extension":[120],"G":[122],"PS":[123],"that":[124,141,167,199],"considers":[125],"trajectories":[126,173],"optimized":[127,174],"dualist":[129],"constraints.":[130],"These":[131],"constraints":[132],"are":[133,149],"aimed":[134],"at":[135,146],"assisting":[136],"so":[140],"trajectory":[143,153,161],"distributions":[144,154,162],"updated":[145],"each":[147],"iteration":[148],"similar":[150],"good":[152],"(e.g.,":[155],"sucessful":[156],"executions)while":[157],"differing":[158],"from":[159],"bad":[160],"(e.g.":[163],"failures).":[164],"We":[165],"show":[166],"policies":[170],"by":[172],"our":[176],"method":[177],"reduce":[178],"failures":[180],"during":[181],"exploration":[184],"phase,":[185],"encourage":[188],"safer":[189],"interactions.":[190],"This":[191],"have":[193],"relevant":[195],"impact":[196],"tasks":[198],"involve":[200],"physical":[201],"contact":[202],"environment":[205],"human":[207],"partners.":[208]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
