{"id":"https://openalex.org/W4387624000","doi":"https://doi.org/10.1109/lra.2023.3324590","title":"Trade-Off Between Robustness and Rewards Adversarial Training for Deep Reinforcement Learning Under Large Perturbations","display_name":"Trade-Off Between Robustness and Rewards Adversarial Training for Deep Reinforcement Learning Under Large Perturbations","publication_year":2023,"publication_date":"2023-10-13","ids":{"openalex":"https://openalex.org/W4387624000","doi":"https://doi.org/10.1109/lra.2023.3324590"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2023.3324590","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2023.3324590","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101479874","display_name":"Jeffrey Huang","orcid":"https://orcid.org/0009-0000-8358-5435"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeffrey Huang","raw_affiliation_strings":["Department of Electrical and Systems Engineering, University of Pennsylvania, Philadelphia, PA, USA"],"raw_orcid":"https://orcid.org/0009-0000-8358-5435","affiliations":[{"raw_affiliation_string":"Department of Electrical and Systems Engineering, University of Pennsylvania, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108440699","display_name":"Ho Jin Choi","orcid":null},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ho Jin Choi","raw_affiliation_strings":["Department of Mechanical Engineering and Applied Mechanics, University of Pennsylvania, Philadelphia, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering and Applied Mechanics, University of Pennsylvania, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074348852","display_name":"Nadia Figueroa","orcid":"https://orcid.org/0000-0002-6873-4671"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nadia Figueroa","raw_affiliation_strings":["Department of Mechanical Engineering and Applied Mechanics, University of Pennsylvania, Philadelphia, PA, USA"],"raw_orcid":"https://orcid.org/0000-0002-6873-4671","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering and Applied Mechanics, University of Pennsylvania, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I79576946"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1632,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.56755763,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"8","issue":"12","first_page":"8018","last_page":"8025"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7219691276550293},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6585667133331299},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6370594501495361},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5941219925880432},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5652546882629395},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.44355711340904236},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39151135087013245}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7219691276550293},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6585667133331299},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6370594501495361},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5941219925880432},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5652546882629395},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.44355711340904236},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39151135087013245},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2023.3324590","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2023.3324590","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W1673923490","https://openalex.org/W1686810756","https://openalex.org/W1945616565","https://openalex.org/W2244449411","https://openalex.org/W2570685808","https://openalex.org/W2575705757","https://openalex.org/W2602963933","https://openalex.org/W2604505099","https://openalex.org/W2746553466","https://openalex.org/W2773525213","https://openalex.org/W2773691349","https://openalex.org/W2775482448","https://openalex.org/W2781726626","https://openalex.org/W2791953061","https://openalex.org/W2810785043","https://openalex.org/W2913266441","https://openalex.org/W2941205169","https://openalex.org/W2949103145","https://openalex.org/W2950300520","https://openalex.org/W2963062382","https://openalex.org/W2963403593","https://openalex.org/W2963431851","https://openalex.org/W2963969878","https://openalex.org/W2981344907","https://openalex.org/W2995442767","https://openalex.org/W2996343955","https://openalex.org/W2996344901","https://openalex.org/W3009542902","https://openalex.org/W3034215083","https://openalex.org/W3098237412","https://openalex.org/W3199735515","https://openalex.org/W3200885897","https://openalex.org/W3207730978","https://openalex.org/W3207958628","https://openalex.org/W4283329983","https://openalex.org/W4287637349","https://openalex.org/W4287692319","https://openalex.org/W4293846201","https://openalex.org/W4297824641","https://openalex.org/W4298857966","https://openalex.org/W6637162671","https://openalex.org/W6637373629","https://openalex.org/W6637967152","https://openalex.org/W6640425456","https://openalex.org/W6728252718","https://openalex.org/W6731927902","https://openalex.org/W6733049761","https://openalex.org/W6735677848","https://openalex.org/W6739868092","https://openalex.org/W6747027214","https://openalex.org/W6747473740","https://openalex.org/W6748965907","https://openalex.org/W6750404860","https://openalex.org/W6753243525","https://openalex.org/W6759129252","https://openalex.org/W6762427411","https://openalex.org/W6764339904","https://openalex.org/W6765966149","https://openalex.org/W6769596995","https://openalex.org/W6771690906","https://openalex.org/W6772461460","https://openalex.org/W6774469542","https://openalex.org/W6774966973","https://openalex.org/W6780559895","https://openalex.org/W6781541518","https://openalex.org/W6784493056","https://openalex.org/W6801180640","https://openalex.org/W6802920152","https://openalex.org/W6838933218"],"related_works":["https://openalex.org/W2502115930","https://openalex.org/W4246396837","https://openalex.org/W2482350142","https://openalex.org/W3176240006","https://openalex.org/W3126451824","https://openalex.org/W1561927205","https://openalex.org/W3191453585","https://openalex.org/W4297672492","https://openalex.org/W4288019534","https://openalex.org/W4310988119"],"abstract_inverted_index":{"Deep":[0],"Reinforcement":[1],"Learning":[2],"(DRL)":[3],"has":[4],"become":[5],"a":[6,69,116,148,153],"popular":[7],"approach":[8,78],"for":[9,74],"training":[10,72,128],"robots":[11],"due":[12],"to":[13,29,41,64,91,114,131,176],"its":[14,36],"generalization":[15],"promise,":[16],"complex":[17],"task":[18,151,163],"capacity":[19],"and":[20,50,67,86,120,134,147,179],"minimal":[21],"human":[22],"intervention.":[23],"Nevertheless,":[24],"DRL-trained":[25],"controllers":[26],"are":[27],"vulnerable":[28],"even":[30],"the":[31,58,83,87,111,160],"smallest":[32],"of":[33,60],"perturbations":[34,66,94,181],"on":[35,82,137],"inputs":[37],"which":[38],"can":[39],"lead":[40],"catastrophic":[42],"failures":[43],"in":[44,165],"real-world":[45,93],"human-centric":[46],"environments":[47],"with":[48,152,171],"large":[49,65],"unexpected":[51],"perturbations.":[52],"In":[53],"this":[54],"work,":[55],"we":[56,109,157],"study":[57],"vulnerability":[59],"state-of-the-art":[61],"DRL":[62,127,139],"subject":[63,175],"propose":[68],"novel":[70],"adversarial":[71,112],"framework":[73],"robust":[75,130],"control.":[76],"Our":[77],"generates":[79],"aggressive":[80,132],"attacks":[81,133],"state":[84],"space":[85],"expected":[88],"state-action":[89],"values":[90],"emulate":[92],"such":[95],"as":[96],"sensor":[97,177],"noise,":[98],"perception":[99],"failures,":[100],"physical":[101,180],"perturbations,":[102],"observations":[103],"mismatch,":[104],"etc.":[105],"To":[106],"achieve":[107],"this,":[108],"reformulate":[110],"risk":[113],"yield":[115],"trade-off":[117],"between":[118],"rewards":[119],"robustness":[121],"(TBRR).":[122],"We":[123],"show":[124,158],"that":[125,159],"TBRR-aided":[126],"is":[129],"outperforms":[135],"baselines":[136],"standard":[138],"benchmarks":[140],"(Cartpole,":[141],"Pendulum),":[142],"Meta-World":[143],"tasks":[144],"(door":[145],"manipulation)":[146],"vision-based":[149,161],"grasping":[150,162],"7DoF":[154],"manipulator.":[155],"Finally,":[156],"trained":[164],"simulation":[166],"via":[167],"TBRR":[168],"transfers":[169],"sim2real":[170],"70%":[172],"success":[173],"rate":[174],"impairment":[178],"without":[182],"any":[183],"retraining.":[184]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
