{"id":"https://openalex.org/W4383109187","doi":"https://doi.org/10.1109/icra48891.2023.10161058","title":"Guiding Reinforcement Learning with Shared Control Templates","display_name":"Guiding Reinforcement Learning with Shared Control Templates","publication_year":2023,"publication_date":"2023-05-29","ids":{"openalex":"https://openalex.org/W4383109187","doi":"https://doi.org/10.1109/icra48891.2023.10161058"},"language":"en","primary_location":{"id":"doi:10.1109/icra48891.2023.10161058","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icra48891.2023.10161058","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016248595","display_name":"Abhishek Padalkar","orcid":null},"institutions":[{"id":"https://openalex.org/I2898391981","display_name":"Deutsches Zentrum f\u00fcr Luft- und Raumfahrt e. V. (DLR)","ror":"https://ror.org/04bwf3e34","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2898391981"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Abhishek Padalkar","raw_affiliation_strings":["German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234","institution_ids":["https://openalex.org/I2898391981"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055668072","display_name":"Gabriel Quere","orcid":"https://orcid.org/0000-0002-1788-3685"},"institutions":[{"id":"https://openalex.org/I2898391981","display_name":"Deutsches Zentrum f\u00fcr Luft- und Raumfahrt e. V. (DLR)","ror":"https://ror.org/04bwf3e34","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2898391981"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gabriel Quere","raw_affiliation_strings":["German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234","institution_ids":["https://openalex.org/I2898391981"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077188472","display_name":"Franz Steinmetz","orcid":"https://orcid.org/0000-0002-1481-9215"},"institutions":[{"id":"https://openalex.org/I2898391981","display_name":"Deutsches Zentrum f\u00fcr Luft- und Raumfahrt e. V. (DLR)","ror":"https://ror.org/04bwf3e34","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2898391981"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Franz Steinmetz","raw_affiliation_strings":["German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234","institution_ids":["https://openalex.org/I2898391981"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013236820","display_name":"Antonin Raffin","orcid":"https://orcid.org/0000-0001-6036-6950"},"institutions":[{"id":"https://openalex.org/I2898391981","display_name":"Deutsches Zentrum f\u00fcr Luft- und Raumfahrt e. V. (DLR)","ror":"https://ror.org/04bwf3e34","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2898391981"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Antonin Raffin","raw_affiliation_strings":["German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234","institution_ids":["https://openalex.org/I2898391981"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012849051","display_name":"Matthias Nieuwenhuisen","orcid":"https://orcid.org/0000-0002-3706-592X"},"institutions":[{"id":"https://openalex.org/I4210166245","display_name":"Fraunhofer Institute for Communication, Information Processing and Ergonomics","ror":"https://ror.org/05nn0gw40","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210166245","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Matthias Nieuwenhuisen","raw_affiliation_strings":["Fraunhofer Institute for Communication, Information Processing and Ergonomics FKIE,Wachtberg,53343"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fraunhofer Institute for Communication, Information Processing and Ergonomics FKIE,Wachtberg,53343","institution_ids":["https://openalex.org/I4210166245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012562539","display_name":"Jo\u00e3o Silv\u00e9rio","orcid":"https://orcid.org/0000-0003-1428-8933"},"institutions":[{"id":"https://openalex.org/I2898391981","display_name":"Deutsches Zentrum f\u00fcr Luft- und Raumfahrt e. V. (DLR)","ror":"https://ror.org/04bwf3e34","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2898391981"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jo\u00e3o Silv\u00e9rio","raw_affiliation_strings":["German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234","institution_ids":["https://openalex.org/I2898391981"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017689065","display_name":"Freek Stulp","orcid":"https://orcid.org/0000-0001-9555-9517"},"institutions":[{"id":"https://openalex.org/I2898391981","display_name":"Deutsches Zentrum f\u00fcr Luft- und Raumfahrt e. V. (DLR)","ror":"https://ror.org/04bwf3e34","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I2898391981"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Freek Stulp","raw_affiliation_strings":["German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"German Aerospace Center (DLR), Robotics and Mechatronics Center (RMC),We&#x00DF;ling,Germany,82234","institution_ids":["https://openalex.org/I2898391981"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5016248595"],"corresponding_institution_ids":["https://openalex.org/I2898391981"],"apc_list":null,"apc_paid":null,"fwci":0.352,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.57061211,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9408000111579895,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8437467813491821},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7390144467353821},{"id":"https://openalex.org/keywords/safer","display_name":"SAFER","score":0.7355955839157104},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.6957453489303589},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6934680938720703},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.587946355342865},{"id":"https://openalex.org/keywords/template","display_name":"Template","score":0.5650699734687805},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5450502038002014},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5421996116638184},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5370551347732544},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4525713622570038},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3813588619232178},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1164882481098175},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.09253886342048645}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8437467813491821},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7390144467353821},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.7355955839157104},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.6957453489303589},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6934680938720703},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.587946355342865},{"id":"https://openalex.org/C82714645","wikidata":"https://www.wikidata.org/wiki/Q438331","display_name":"Template","level":2,"score":0.5650699734687805},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5450502038002014},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5421996116638184},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5370551347732544},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4525713622570038},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3813588619232178},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1164882481098175},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.09253886342048645},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icra48891.2023.10161058","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icra48891.2023.10161058","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:elib.dlr.de:193739","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ICRA48891.2023.10161058>.","pdf_url":null,"source":{"id":"https://openalex.org/S4377196266","display_name":"elib (German Aerospace Center)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2898391981","host_organization_name":"Deutsches Zentrum f\u00fcr Luft- und Raumfahrt e. V. (DLR)","host_organization_lineage":["https://openalex.org/I2898391981"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},{"id":"pmh:oai:publica.fraunhofer.de:publica/488303","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/488303","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference paper"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6600000262260437,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G3663555733","display_name":"VERtical Innovation in the Domain of Robotics Enabled by Artificial intelligence Methods","funder_award_id":"951992","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G7842005466","display_name":null,"funder_award_id":"Horizon 2020","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G923131758","display_name":null,"funder_award_id":"329551904","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"},{"id":"https://openalex.org/F4320324729","display_name":"Universit\u00e4t Bremen","ror":"https://ror.org/04ers2y35"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1777239053","https://openalex.org/W2089145243","https://openalex.org/W2105660272","https://openalex.org/W2151992157","https://openalex.org/W2257979135","https://openalex.org/W2781726626","https://openalex.org/W2914688076","https://openalex.org/W2942608247","https://openalex.org/W2962736495","https://openalex.org/W2966735560","https://openalex.org/W2990747716","https://openalex.org/W2996196387","https://openalex.org/W3014728935","https://openalex.org/W3037164854","https://openalex.org/W3042273483","https://openalex.org/W3091182126","https://openalex.org/W3093922502","https://openalex.org/W3104876774","https://openalex.org/W3129731039","https://openalex.org/W3134383769","https://openalex.org/W3205212549","https://openalex.org/W3205225162","https://openalex.org/W3208165232","https://openalex.org/W3212580602","https://openalex.org/W3216772467","https://openalex.org/W4206268535","https://openalex.org/W4288363736","https://openalex.org/W4298857966","https://openalex.org/W6637967152","https://openalex.org/W6638088447","https://openalex.org/W6747473740","https://openalex.org/W6772100842","https://openalex.org/W6784062865","https://openalex.org/W6801971982"],"related_works":["https://openalex.org/W2953205341","https://openalex.org/W2092643327","https://openalex.org/W235065745","https://openalex.org/W2029935773","https://openalex.org/W2787754950","https://openalex.org/W1572215850","https://openalex.org/W1985775355","https://openalex.org/W2352115286","https://openalex.org/W4256172809","https://openalex.org/W2084793300"],"abstract_inverted_index":{"Purposeful":[0],"interaction":[1],"with":[2,130],"objects":[3],"usually":[4],"requires":[5,152],"certain":[6],"constraints":[7,23,34,99],"to":[8,16,56,63],"be":[9,37,71],"respected,":[10],"e.g.":[11],"keeping":[12],"a":[13,32,138,145],"bottle":[14],"upright":[15],"avoid":[17],"spilling.":[18],"In":[19],"reinforcement":[20],"learning,":[21],"such":[22],"are":[24,91],"typically":[25],"encoded":[26],"in":[27,84,104,137,141,156],"the":[28,47,58,61,67,105,110,113,120,150],"reward":[29,106,114,132],"function.":[30,115],"As":[31],"consequence,":[33],"can":[35],"only":[36,153],"learned":[38],"by":[39],"violating":[40],"them.":[41],"This":[42],"often":[43],"precludes":[44],"learning":[45,69,125,149],"on":[46,144],"physical":[48],"robot,":[49,147],"as":[50],"it":[51],"may":[52,70],"take":[53],"many":[54],"trials":[55],"learn":[57],"constraints,":[59],"and":[60,143],"necessity":[62],"violate":[64],"them":[65],"during":[66],"trial-and-error":[68],"unsafe.":[72],"We":[73,134],"have":[74],"serendipitously":[75],"discovered":[76],"that":[77],"constraint":[78,127],"representations":[79],"for":[80,94],"shared":[81],"control":[82],"-":[83,90],"particular":[85],"Shared":[86],"Control":[87],"Templates":[88],"(SCTs)":[89],"ideally":[92],"suited":[93],"safely":[95],"guiding":[96],"RL.":[97],"Representing":[98],"explicitly,":[100],"rather":[101],"than":[102],"implicitly":[103],"function,":[107],"also":[108],"simplifies":[109],"design":[111],"of":[112,119],"The":[116],"main":[117],"advantage":[118],"approach":[121],"is":[122],"safer,":[123],"faster":[124],"without":[126],"violations":[128],"(even":[129],"sparse":[131],"functions).":[133],"demonstrate":[135],"this":[136],"pouring":[139],"task":[140,151],"simulation":[142],"real":[146],"where":[148],"65":[154],"episodes":[155],"16":[157],"minutes.":[158]},"counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-15T08:27:34.491423","created_date":"2025-10-10T00:00:00"}
