{"id":"https://openalex.org/W4414472164","doi":"https://doi.org/10.3390/make7040108","title":"Learning to Balance Mixed Adversarial Attacks for Robust Reinforcement Learning","display_name":"Learning to Balance Mixed Adversarial Attacks for Robust Reinforcement Learning","publication_year":2025,"publication_date":"2025-09-24","ids":{"openalex":"https://openalex.org/W4414472164","doi":"https://doi.org/10.3390/make7040108"},"language":"en","primary_location":{"id":"doi:10.3390/make7040108","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040108","pdf_url":"https://www.mdpi.com/2504-4990/7/4/108/pdf?version=1758716393","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-4990/7/4/108/pdf?version=1758716393","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101916189","display_name":"Mustafa Erdem","orcid":"https://orcid.org/0000-0002-5228-1743"},"institutions":[{"id":"https://openalex.org/I4210101260","display_name":"T\u00fcrkisch-Deutsche Universit\u00e4t","ror":"https://ror.org/017bbc354","country_code":"TR","type":"education","lineage":["https://openalex.org/I4210101260"]},{"id":"https://openalex.org/I48912391","display_name":"Istanbul Technical University","ror":"https://ror.org/059636586","country_code":"TR","type":"education","lineage":["https://openalex.org/I48912391"]}],"countries":["TR"],"is_corresponding":true,"raw_author_name":"Mustafa Erdem","raw_affiliation_strings":["Department of Mechatronics Engineering, Istanbul Technical University, Maslak, 34467 Istanbul, T\u00fcrkiye","Department of Mechatronics Engineering, Turkish-German University, Beykoz, 34820 Istanbul, T\u00fcrkiye"],"affiliations":[{"raw_affiliation_string":"Department of Mechatronics Engineering, Istanbul Technical University, Maslak, 34467 Istanbul, T\u00fcrkiye","institution_ids":["https://openalex.org/I48912391"]},{"raw_affiliation_string":"Department of Mechatronics Engineering, Turkish-German University, Beykoz, 34820 Istanbul, T\u00fcrkiye","institution_ids":["https://openalex.org/I4210101260"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028990507","display_name":"Naz\u0131m Kemal \u00dcre","orcid":"https://orcid.org/0000-0003-2660-2141"},"institutions":[{"id":"https://openalex.org/I48912391","display_name":"Istanbul Technical University","ror":"https://ror.org/059636586","country_code":"TR","type":"education","lineage":["https://openalex.org/I48912391"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Naz\u0131m Kemal \u00dcre","raw_affiliation_strings":["Department of Artificial Intelligence and Data Engineering, Istanbul Technical University, Maslak, 34467 Istanbul, T\u00fcrkiye"],"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence and Data Engineering, Istanbul Technical University, Maslak, 34467 Istanbul, T\u00fcrkiye","institution_ids":["https://openalex.org/I48912391"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101916189"],"corresponding_institution_ids":["https://openalex.org/I4210101260","https://openalex.org/I48912391"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14056997,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"4","first_page":"108","last_page":"108"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.9340999722480774},{"id":"https://openalex.org/keywords/adversary","display_name":"Adversary","score":0.7979999780654907},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.630299985408783},{"id":"https://openalex.org/keywords/compromise","display_name":"Compromise","score":0.6154999732971191},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6062999963760376},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4618000090122223},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4320000112056732},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4113999903202057}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.9340999722480774},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.7979999780654907},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7037000060081482},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.630299985408783},{"id":"https://openalex.org/C46355384","wikidata":"https://www.wikidata.org/wiki/Q726686","display_name":"Compromise","level":2,"score":0.6154999732971191},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6062999963760376},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4618000090122223},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45489999651908875},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4320000112056732},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4113999903202057},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.3813999891281128},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.36809998750686646},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3675000071525574},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.36329999566078186},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35499998927116394},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.320499986410141},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3197000026702881},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2728999853134155},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.25429999828338623}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/make7040108","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040108","pdf_url":"https://www.mdpi.com/2504-4990/7/4/108/pdf?version=1758716393","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:30e5408bcf4440959801604ce00801d5","is_oa":true,"landing_page_url":"https://doaj.org/article/30e5408bcf4440959801604ce00801d5","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction, Vol 7, Iss 4, p 108 (2025)","raw_type":"article"},{"id":"pmh:oai:polen.itu.edu.tr:11527/30315","is_oa":false,"landing_page_url":"https://hdl.handle.net/11527/30315","pdf_url":null,"source":{"id":"https://openalex.org/S4306400460","display_name":"Istanbul Technical University Academic Open Archive (Istanbul Technical University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I48912391","host_organization_name":"Istanbul Technical University","host_organization_lineage":["https://openalex.org/I48912391"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":{"id":"doi:10.3390/make7040108","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040108","pdf_url":"https://www.mdpi.com/2504-4990/7/4/108/pdf?version=1758716393","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4414472164.pdf","grobid_xml":"https://content.openalex.org/works/W4414472164.grobid-xml"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W1965878388","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2158782408","https://openalex.org/W2257979135","https://openalex.org/W2575731723","https://openalex.org/W2605102758","https://openalex.org/W2733312032","https://openalex.org/W2773691349","https://openalex.org/W2967292964","https://openalex.org/W2997293639","https://openalex.org/W2998401161","https://openalex.org/W3036619998","https://openalex.org/W3098237412","https://openalex.org/W3103340107","https://openalex.org/W3164878287","https://openalex.org/W3182829817","https://openalex.org/W4214717370","https://openalex.org/W4221110788","https://openalex.org/W4225395035","https://openalex.org/W4287692319","https://openalex.org/W4305038385","https://openalex.org/W4384652388","https://openalex.org/W4385163974","https://openalex.org/W4387390100","https://openalex.org/W4390242578","https://openalex.org/W4390618715","https://openalex.org/W4395049132","https://openalex.org/W4400764196","https://openalex.org/W4400850547","https://openalex.org/W4402353522","https://openalex.org/W4404448300","https://openalex.org/W4404932791","https://openalex.org/W4405546960"],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"agents":[2,96],"are":[3,53],"highly":[4,104],"susceptible":[5],"to":[6,106,124,155],"adversarial":[7,16,185,205],"attacks":[8,30,102],"that":[9,95,193],"can":[10],"severely":[11],"compromise":[12],"their":[13],"performance.":[14],"Although":[15],"training":[17],"is":[18],"a":[19,76,112,118,157,181],"common":[20],"countermeasure,":[21],"most":[22,210],"existing":[23],"research":[24],"focuses":[25],"on":[26],"defending":[27],"against":[28,100,180],"single-type":[29,101,204],"targeting":[31],"either":[32],"observations":[33],"or":[34,99],"actions.":[35,89],"This":[36],"narrow":[37],"focus":[38],"overlooks":[39],"the":[40,63,73,80,134,142,153,174,177],"complexity":[41],"of":[42,176,184],"real-world":[43],"mixed":[44,107],"attacks,":[45],"where":[46],"an":[47,83],"agent\u2019s":[48,135],"perceptions":[49],"and":[50,65,82,88,144,167,203],"resulting":[51],"actions":[52],"perturbed":[54],"simultaneously.":[55],"To":[56,137],"systematically":[57],"study":[58],"these":[59],"threats,":[60],"we":[61,93,110,140],"introduce":[62],"Action":[64,143],"State-Adversarial":[66,145],"Markov":[67],"Decision":[68],"Process":[69],"(ASA-MDP),":[70],"which":[71,151],"models":[72],"interaction":[74],"as":[75],"zero-sum":[77],"game":[78],"between":[79],"agent":[81,179],"adversary":[84,121,154],"attacking":[85],"both":[86,165],"states":[87],"Using":[90],"this":[91,116],"framework,":[92],"show":[94],"trained":[97,178,195],"conventionally":[98],"remain":[103],"vulnerable":[105],"perturbations.":[108],"Moreover,":[109],"identify":[111],"key":[113],"challenge":[114],"in":[115,171],"setting:":[117],"naive":[119],"mixed-type":[120],"often":[122],"fails":[123],"effectively":[125],"balance":[126],"its":[127,161],"perturbations":[128],"across":[129,164,189],"modalities":[130],"during":[131],"training,":[132],"limiting":[133],"robustness.":[136],"address":[138],"this,":[139],"propose":[141],"Proximal":[146],"Policy":[147],"Optimization":[148],"(ASA-PPO)":[149],"algorithm,":[150],"enables":[152],"learn":[156],"balanced":[158],"strategy,":[159],"distributing":[160],"attack":[162],"budget":[163],"state":[166],"action":[168],"spaces.":[169],"This,":[170],"turn,":[172],"enhances":[173],"robustness":[175],"wide":[182],"range":[183],"scenarios.":[186],"Comprehensive":[187],"experiments":[188],"diverse":[190],"environments":[191],"demonstrate":[192],"policies":[194],"with":[196],"ASA-PPO":[197],"substantially":[198],"outperform":[199],"baselines\u2014including":[200],"standard":[201],"PPO":[202],"methods\u2014under":[206],"action-only,":[207],"observation-only,":[208],"and,":[209],"notably,":[211],"mixed-attack":[212],"conditions.":[213]},"counts_by_year":[],"updated_date":"2026-03-14T06:41:57.775601","created_date":"2025-10-10T00:00:00"}
