{"id":"https://openalex.org/W4287237766","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892901","title":"Improving Robustness of Deep Reinforcement Learning Agents: Environment Attack based on the Critic Network","display_name":"Improving Robustness of Deep Reinforcement Learning Agents: Environment Attack based on the Critic Network","publication_year":2022,"publication_date":"2022-07-18","ids":{"openalex":"https://openalex.org/W4287237766","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892901"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn55064.2022.9892901","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892901","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2104.03154","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034025046","display_name":"Lucas Schott","orcid":"https://orcid.org/0000-0001-7946-9685"},"institutions":[{"id":"https://openalex.org/I3018083178","display_name":"Institut de Recherche Technologique SystemX","ror":"https://ror.org/03crmsn52","country_code":"FR","type":"facility","lineage":["https://openalex.org/I3018083178"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Lucas Schott","raw_affiliation_strings":["IRT SystemX, ISIR, Sorbonne University,Palaiseau,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IRT SystemX, ISIR, Sorbonne University,Palaiseau,France","institution_ids":["https://openalex.org/I39804081","https://openalex.org/I3018083178"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074073956","display_name":"Hatem Hajri","orcid":null},"institutions":[{"id":"https://openalex.org/I3018083178","display_name":"Institut de Recherche Technologique SystemX","ror":"https://ror.org/03crmsn52","country_code":"FR","type":"facility","lineage":["https://openalex.org/I3018083178"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Hatem Hajri","raw_affiliation_strings":["IRT SystemX,Palaiseau,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IRT SystemX,Palaiseau,France","institution_ids":["https://openalex.org/I3018083178"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031635996","display_name":"Sylvain Lamprier","orcid":"https://orcid.org/0000-0002-2508-922X"},"institutions":[{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]},{"id":"https://openalex.org/I4210150358","display_name":"Institut Syst\u00e8mes Intelligents et de Robotique","ror":"https://ror.org/05neq8668","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I154526488","https://openalex.org/I39804081","https://openalex.org/I4210150358","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Sylvain Lamprier","raw_affiliation_strings":["ISIR, Sorbonne University,Paris,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ISIR, Sorbonne University,Paris,France","institution_ids":["https://openalex.org/I39804081","https://openalex.org/I4210150358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06122449,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9394000172615051,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9394000172615051,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.024700000882148743,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.003100000089034438,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8978843688964844},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.828016996383667},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7992738485336304},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7401283383369446},{"id":"https://openalex.org/keywords/adversary","display_name":"Adversary","score":0.7346985936164856},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7310187816619873},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6998901963233948},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5369223356246948},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4542664587497711},{"id":"https://openalex.org/keywords/error-driven-learning","display_name":"Error-driven learning","score":0.4324207901954651},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4254147708415985},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.20220494270324707},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09884610772132874}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8978843688964844},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.828016996383667},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7992738485336304},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7401283383369446},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.7346985936164856},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7310187816619873},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6998901963233948},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5369223356246948},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4542664587497711},{"id":"https://openalex.org/C47932503","wikidata":"https://www.wikidata.org/wiki/Q5395689","display_name":"Error-driven learning","level":3,"score":0.4324207901954651},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4254147708415985},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.20220494270324707},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09884610772132874},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ijcnn55064.2022.9892901","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892901","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"doi:10.48550/arxiv.2104.03154","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.03154","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2104.03154","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.03154","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320333573","display_name":"Emerging Pathogens Institute, University of Florida","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1542941925","https://openalex.org/W2076337359","https://openalex.org/W2180612164","https://openalex.org/W2897889564","https://openalex.org/W2962755762","https://openalex.org/W2981396729","https://openalex.org/W2998116579","https://openalex.org/W3108034711","https://openalex.org/W6637967152","https://openalex.org/W6640425456","https://openalex.org/W6681187623","https://openalex.org/W6696772115","https://openalex.org/W6733049761","https://openalex.org/W6735677848","https://openalex.org/W6741002519","https://openalex.org/W6755038706","https://openalex.org/W6770271268"],"related_works":["https://openalex.org/W10379689","https://openalex.org/W868042","https://openalex.org/W5991403","https://openalex.org/W8539471","https://openalex.org/W9657784","https://openalex.org/W14942622","https://openalex.org/W2683128","https://openalex.org/W929682","https://openalex.org/W3569651","https://openalex.org/W8698196"],"abstract_inverted_index":{"To":[0],"improve":[1],"robustness":[2,172],"of":[3,10,17,20,25,70,103,110,113,134,138,146,152,173,179],"deep":[4],"reinforcement":[5,35],"learning":[6,36,154],"agents,":[7],"a":[8,44,55],"line":[9],"recent":[11],"works":[12],"focus":[13],"on":[14,85,99],"producing":[15],"disturbances":[16,31,109],"the":[18,21,26,41,48,61,68,71,76,100,104,111,114,132,135,139,144,147,153,174,180],"dynamics":[19,69,112],"environment.":[22,115],"Existing":[23],"approaches":[24],"literature":[27],"to":[28,53,66,74,83,106,141,167],"generate":[29],"such":[30],"are":[32],"environment":[33,73],"adversarial":[34,87],"methods.":[37],"These":[38],"methods":[39,178],"set":[40],"problem":[42],"as":[43,125],"two-player":[45],"game":[46],"between":[47],"protagonist":[49,77,105],"agent,":[50,63,121],"which":[51,64,122],"learns":[52,65],"perform":[54],"task":[56,148],"in":[57,171],"an":[58,119],"environment,":[59],"and":[60,128,164],"adversary":[62,120],"disturb":[67],"considered":[72],"make":[75],"agent":[78],"fail.":[79],"Alternatively,":[80],"we":[81,97,130],"propose":[82],"build":[84],"gradient-based":[86],"attacks,":[88],"usually":[89,123],"used":[90],"for":[91,94],"classification":[92],"tasks":[93],"instance,":[95],"that":[96,158],"apply":[98],"critic":[101,136],"network":[102,137],"identify":[107],"efficient":[108],"Rather":[116],"than":[117,176],"training":[118],"reveals":[124],"very":[126],"complex":[127],"unstable,":[129],"leverage":[131],"knowledge":[133],"protagonist,":[140],"dynamically":[142],"increase":[143],"complexity":[145],"at":[149],"each":[150],"step":[151],"process.":[155],"We":[156],"show":[157],"our":[159],"method,":[160],"while":[161],"being":[162],"faster":[163],"lighter,":[165],"leads":[166],"significantly":[168],"better":[169],"improvements":[170],"policy":[175],"existing":[177],"literature.":[181]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2022-07-25T00:00:00"}
