{"id":"https://openalex.org/W4205158356","doi":"https://doi.org/10.1109/tnnls.2021.3133537","title":"Adversary Agnostic Robust Deep Reinforcement Learning","display_name":"Adversary Agnostic Robust Deep Reinforcement Learning","publication_year":2021,"publication_date":"2021-12-22","ids":{"openalex":"https://openalex.org/W4205158356","doi":"https://doi.org/10.1109/tnnls.2021.3133537","pmid":"https://pubmed.ncbi.nlm.nih.gov/34936559"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2021.3133537","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3133537","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Xinghua Qu","orcid":"https://orcid.org/0000-0001-8072-2019"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xinghua Qu","raw_affiliation_strings":["Bytedance AI Laboratory, Speech and Audio Team, Singapore"],"raw_orcid":"https://orcid.org/0000-0001-8072-2019","affiliations":[{"raw_affiliation_string":"Bytedance AI Laboratory, Speech and Audio Team, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Abhishek Gupta","orcid":"https://orcid.org/0000-0002-6080-855X"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I4210091207","display_name":"Singapore Institute of Manufacturing Technology","ror":"https://ror.org/00f44np30","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I4210091207","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Abhishek Gupta","raw_affiliation_strings":["A&#x002A;STAR, Singapore Institute of Manufacturing Technology (SIMTech), Singapore"],"raw_orcid":"https://orcid.org/0000-0002-6080-855X","affiliations":[{"raw_affiliation_string":"A&#x002A;STAR, Singapore Institute of Manufacturing Technology (SIMTech), Singapore","institution_ids":["https://openalex.org/I115228651","https://openalex.org/I4210091207"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yew-Soon Ong","orcid":"https://orcid.org/0000-0002-4480-169X"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yew-Soon Ong","raw_affiliation_strings":["Data Science and Artificial Intelligence Research Centre, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-4480-169X","affiliations":[{"raw_affiliation_string":"Data Science and Artificial Intelligence Research Centre, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":null,"display_name":"Zhu Sun","orcid":"https://orcid.org/0000-0002-3350-7022"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Zhu Sun","raw_affiliation_strings":["A&#x002A;STAR, Institute of High Performance Computing, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-3350-7022","affiliations":[{"raw_affiliation_string":"A&#x002A;STAR, Institute of High Performance Computing, Singapore","institution_ids":["https://openalex.org/I115228651","https://openalex.org/I3004594783"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3902,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.70851857,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"34","issue":"9","first_page":"6146","last_page":"6157"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.00019999999494757503,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.00019999999494757503,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.8285999894142151},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6398000121116638},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5727999806404114},{"id":"https://openalex.org/keywords/maximization","display_name":"Maximization","score":0.5343000292778015},{"id":"https://openalex.org/keywords/adversary","display_name":"Adversary","score":0.4884999990463257},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.39590001106262207}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.8285999894142151},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6712999939918518},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6398000121116638},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5727999806404114},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.5343000292778015},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5336999893188477},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.4884999990463257},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.41510000824928284},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.39590001106262207},{"id":"https://openalex.org/C2986577269","wikidata":"https://www.wikidata.org/wiki/Q11306265","display_name":"Random noise","level":2,"score":0.3882000148296356},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3677999973297119},{"id":"https://openalex.org/C200331156","wikidata":"https://www.wikidata.org/wiki/Q506041","display_name":"Jacobian matrix and determinant","level":2,"score":0.3312000036239624},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.3066999912261963},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.3027999997138977},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2646999955177307}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2021.3133537","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3133537","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:34936559","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34936559","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1496462336","https://openalex.org/W2112594540","https://openalex.org/W2124267516","https://openalex.org/W2145339207","https://openalex.org/W2180612164","https://openalex.org/W2761873684","https://openalex.org/W2773691349","https://openalex.org/W2941205169","https://openalex.org/W2963178695","https://openalex.org/W3007869767","https://openalex.org/W6640425456","https://openalex.org/W6683300800","https://openalex.org/W6685444567","https://openalex.org/W6732837357","https://openalex.org/W6733049761","https://openalex.org/W6735677848","https://openalex.org/W6737893269","https://openalex.org/W6739868092","https://openalex.org/W6740092555","https://openalex.org/W6746973458","https://openalex.org/W6747027214","https://openalex.org/W6752654261","https://openalex.org/W6755480234","https://openalex.org/W6758269446","https://openalex.org/W6759225949","https://openalex.org/W6763118746","https://openalex.org/W6766530665","https://openalex.org/W6770271268","https://openalex.org/W6772212738","https://openalex.org/W6774966973","https://openalex.org/W6784513241","https://openalex.org/W6788725866"],"related_works":[],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2,103],"(DRL)":[3],"policies":[4],"have":[5],"been":[6],"shown":[7],"to":[8,54,81,152,188,201,226],"be":[9,48,118],"deceived":[10],"by":[11,132,161],"perturbations":[12],"(e.g.,":[13],"random":[14],"noise":[15],"or":[16],"intensional":[17],"adversarial":[18,45,210],"attacks)":[19],"on":[20,58,125,214],"state":[21],"observations":[22,61],"that":[23,43,99,114,180,196],"appear":[24],"at":[25],"test":[26],"time":[27],"but":[28,74],"are":[29],"unknown":[30],"during":[31],"training.":[32],"To":[33,107],"increase":[34,202],"the":[35,51,87,122,155,158,162,166,169,182,189,203,209,220,227],"robustness":[36,70,115],"of":[37,84,121,157,184,222],"DRL":[38,97],"policies,":[39],"previous":[40],"approaches":[41,66],"assume":[42],"explicit":[44],"information":[46],"can":[47],"added":[49],"into":[50],"training":[52],"process,":[53],"achieve":[55],"generalization":[56],"ability":[57],"these":[59],"perturbed":[60],"as":[62],"well.":[63],"However,":[64],"such":[65],"not":[67,101],"only":[68],"make":[69],"improvement":[71],"more":[72],"expensive":[73],"may":[75],"also":[76],"leave":[77],"a":[78,126,137,145,174],"model":[79],"prone":[80],"other":[82],"kinds":[83],"attacks":[85],"in":[86],"wild.":[88],"In":[89],"contrast,":[90],"we":[91,110,135],"propose":[92,136],"an":[93],"adversary":[94],"agnostic":[95],"robust":[96],"paradigm":[98],"does":[100],"require":[102],"from":[104],"predefined":[105],"adversaries.":[106],"this":[108,133],"end,":[109],"first":[111],"theoretically":[112],"show":[113],"could":[116],"indeed":[117],"achieved":[119],"independently":[120],"adversaries":[123],"based":[124],"policy":[127,164],"distillation":[128,198],"(PD)":[129],"setting.":[130],"Motivated":[131],"finding,":[134],"new":[138],"PD":[139],"loss":[140,150,179,199],"with":[141,186],"two":[142],"terms:":[143],"1)":[144],"prescription":[146,204],"gap":[147,205],"maximization":[148],"(PGM)":[149],"aiming":[151],"simultaneously":[153],"maximize":[154],"likelihood":[156],"action":[159],"selected":[160],"teacher":[163],"and":[165,172,206],"entropy":[167],"over":[168],"remaining":[170],"actions":[171],"2)":[173],"corresponding":[175],"Jacobian":[176],"regularization":[177],"(JR)":[178],"minimizes":[181],"magnitude":[183],"gradients":[185],"respect":[187],"input":[190],"state.":[191],"The":[192],"theoretical":[193],"analysis":[194],"substantiates":[195],"our":[197,223],"guarantees":[200],"hence":[207],"improves":[208],"robustness.":[211],"Furthermore,":[212],"experiments":[213],"five":[215],"Atari":[216],"games":[217],"firmly":[218],"verify":[219],"superiority":[221],"approach":[224],"compared":[225],"state-of-the-art":[228],"baselines.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2026-07-22T07:51:19.307946","created_date":"2022-01-26T00:00:00"}