{"id":"https://openalex.org/W4312771252","doi":"https://doi.org/10.1109/iros47612.2022.9981185","title":"A Contact-Safe Reinforcement Learning Framework for Contact-Rich Robot Manipulation","display_name":"A Contact-Safe Reinforcement Learning Framework for Contact-Rich Robot Manipulation","publication_year":2022,"publication_date":"2022-10-23","ids":{"openalex":"https://openalex.org/W4312771252","doi":"https://doi.org/10.1109/iros47612.2022.9981185"},"language":"en","primary_location":{"id":"doi:10.1109/iros47612.2022.9981185","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros47612.2022.9981185","pdf_url":null,"source":{"id":"https://openalex.org/S4363607704","display_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005459431","display_name":"X. Zhu","orcid":"https://orcid.org/0009-0003-2742-3433"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiang Zhu","raw_affiliation_strings":["institute for interdisciplinary information sciences, Tsinghua University,Beijing,China","institute for interdisciplinary information sciences, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"institute for interdisciplinary information sciences, Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"institute for interdisciplinary information sciences, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016645045","display_name":"Shucheng Kang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shucheng Kang","raw_affiliation_strings":["institute for interdisciplinary information sciences, Tsinghua University,Beijing,China","institute for interdisciplinary information sciences, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"institute for interdisciplinary information sciences, Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"institute for interdisciplinary information sciences, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100611368","display_name":"Jianyu Chen","orcid":"https://orcid.org/0000-0002-6784-3809"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianyu Chen","raw_affiliation_strings":["Shanghai Qizhi Insitute,Shanghai,China","Shanghai Qizhi Insitute, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Qizhi Insitute,Shanghai,China","institution_ids":[]},{"raw_affiliation_string":"Shanghai Qizhi Insitute, Shanghai, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5005459431"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.415,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.82355915,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2476","last_page":"2482"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11023","display_name":"Prosthetics and Rehabilitation Robotics","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10868","display_name":"Soft Robotics and Applications","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8341604471206665},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.7991769313812256},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7199569940567017},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6601086258888245},{"id":"https://openalex.org/keywords/collision","display_name":"Collision","score":0.6190105080604553},{"id":"https://openalex.org/keywords/collision-avoidance","display_name":"Collision avoidance","score":0.5744679570198059},{"id":"https://openalex.org/keywords/robot-end-effector","display_name":"Robot end effector","score":0.5389744639396667},{"id":"https://openalex.org/keywords/contact-force","display_name":"Contact force","score":0.5012059211730957},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.49580320715904236},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47874730825424194},{"id":"https://openalex.org/keywords/imperfect","display_name":"Imperfect","score":0.45715588331222534},{"id":"https://openalex.org/keywords/simulation","display_name":"Simulation","score":0.394045889377594},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.36459970474243164},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.22905272245407104},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.09016391634941101}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8341604471206665},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.7991769313812256},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7199569940567017},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6601086258888245},{"id":"https://openalex.org/C121704057","wikidata":"https://www.wikidata.org/wiki/Q352070","display_name":"Collision","level":2,"score":0.6190105080604553},{"id":"https://openalex.org/C2780864053","wikidata":"https://www.wikidata.org/wiki/Q5147495","display_name":"Collision avoidance","level":3,"score":0.5744679570198059},{"id":"https://openalex.org/C8652668","wikidata":"https://www.wikidata.org/wiki/Q1340324","display_name":"Robot end effector","level":3,"score":0.5389744639396667},{"id":"https://openalex.org/C81302111","wikidata":"https://www.wikidata.org/wiki/Q2916417","display_name":"Contact force","level":2,"score":0.5012059211730957},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.49580320715904236},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47874730825424194},{"id":"https://openalex.org/C2780310539","wikidata":"https://www.wikidata.org/wiki/Q12547192","display_name":"Imperfect","level":2,"score":0.45715588331222534},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.394045889377594},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.36459970474243164},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.22905272245407104},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.09016391634941101},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros47612.2022.9981185","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros47612.2022.9981185","pdf_url":null,"source":{"id":"https://openalex.org/S4363607704","display_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6299999952316284,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1967377907","https://openalex.org/W1969583232","https://openalex.org/W1976030291","https://openalex.org/W2032381729","https://openalex.org/W2042803738","https://openalex.org/W2112474089","https://openalex.org/W2131292439","https://openalex.org/W2205340216","https://openalex.org/W2736601468","https://openalex.org/W2753738274","https://openalex.org/W2762248135","https://openalex.org/W2962736495","https://openalex.org/W2964067469","https://openalex.org/W2964333597","https://openalex.org/W2968268581","https://openalex.org/W2968748054","https://openalex.org/W3003629310","https://openalex.org/W3012211643","https://openalex.org/W3028308378","https://openalex.org/W3081917349","https://openalex.org/W3090211586","https://openalex.org/W3100172161","https://openalex.org/W3115157241","https://openalex.org/W3181385848","https://openalex.org/W3204951990","https://openalex.org/W3210663387","https://openalex.org/W4214717370","https://openalex.org/W6741002519","https://openalex.org/W6756303580","https://openalex.org/W6782539135","https://openalex.org/W6802602109"],"related_works":["https://openalex.org/W2374250903","https://openalex.org/W2889566344","https://openalex.org/W1546413948","https://openalex.org/W4317634134","https://openalex.org/W2981729160","https://openalex.org/W2570672926","https://openalex.org/W4378191988","https://openalex.org/W3210882399","https://openalex.org/W1984186897","https://openalex.org/W1976172674"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,52],"shows":[2],"great":[3],"potential":[4],"to":[5,87,97,105,113,126,143],"solve":[6],"complex":[7],"contact-rich":[8,55,107],"robot":[9,56,78,134],"manipulation":[10],"tasks.":[11],"However,":[12],"the":[13,19,33,63,70,77,81,90,94,101,118,127,145,158,169,172],"safety":[14,60],"of":[15],"using":[16],"RL":[17,34,71,119],"in":[18,41,61,121,150],"real":[20,128],"world":[21,131],"is":[22,36,85,103,141,160],"a":[23,49],"crucial":[24],"problem,":[25],"since":[26],"unexpected":[27,74,165],"dangerous":[28],"collisions":[29,75],"might":[30],"happen":[31],"when":[32,157],"policy":[35,72,120,159],"imperfect":[37],"during":[38],"training":[39],"or":[40],"unseen":[42,162],"scenarios.":[43],"In":[44],"this":[45],"paper,":[46],"we":[47],"propose":[48],"contact-safe":[50],"reinforcement":[51],"framework":[53,84],"for":[54],"manipulation,":[57],"which":[58],"maintains":[59],"both":[62,149],"task":[64,151],"space":[65,152,155],"and":[66,80,92,123,153],"joint":[67,154],"space.":[68],"When":[69],"causes":[73],"between":[76],"arm":[79],"environment,":[82],"our":[83,139],"able":[86,142],"immediately":[88],"detect":[89],"collision":[91],"ensure":[93],"contact":[95,146],"force":[96,147],"be":[98],"small.":[99],"Furthermore,":[100],"end-effector":[102],"enforced":[104],"perform":[106],"tasks":[108,136],"compliantly,":[109],"while":[110,167],"keeping":[111],"robust":[112],"external":[114],"disturbances.":[115],"We":[116],"train":[117],"simulation":[122],"transfer":[124],"it":[125],"robot.":[129],"Real":[130],"experiments":[132],"on":[133,171],"wiping":[135],"show":[137],"that":[138],"method":[140],"keep":[144],"small":[148],"even":[156],"under":[161],"scenario":[163],"with":[164],"collision,":[166],"rejecting":[168],"disturbances":[170],"main":[173],"task.":[174]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
