{"id":"https://openalex.org/W3012156918","doi":"https://doi.org/10.1109/lcsys.2020.2979572","title":"On-Off Adversarially Robust Q-Learning","display_name":"On-Off Adversarially Robust Q-Learning","publication_year":2020,"publication_date":"2020-03-10","ids":{"openalex":"https://openalex.org/W3012156918","doi":"https://doi.org/10.1109/lcsys.2020.2979572","mag":"3012156918"},"language":"en","primary_location":{"id":"doi:10.1109/lcsys.2020.2979572","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lcsys.2020.2979572","pdf_url":null,"source":{"id":"https://openalex.org/S4306422535","display_name":"IEEE Control Systems Letters","issn_l":"2475-1456","issn":["2475-1456"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Control Systems Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043865100","display_name":"Prachi Pratyusha Sahoo","orcid":"https://orcid.org/0000-0002-2768-601X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Prachi Pratyusha Sahoo","raw_affiliation_strings":["Woodruff School for Mechanical Engineering, Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0002-2768-601X","affiliations":[{"raw_affiliation_string":"Woodruff School for Mechanical Engineering, Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040301558","display_name":"Kyriakos G. Vamvoudakis","orcid":"https://orcid.org/0000-0003-1978-4848"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kyriakos G. Vamvoudakis","raw_affiliation_strings":["Daniel H. Guggenheim School of Aerospace Engineering, Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0003-1978-4848","affiliations":[{"raw_affiliation_string":"Daniel H. Guggenheim School of Aerospace Engineering, Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":0.6772,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.75907927,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"4","issue":"3","first_page":"749","last_page":"754"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9635999798774719,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9560999870300293,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.8441022038459778},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.8198142051696777},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6335320472717285},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5050106644630432},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.5026288032531738},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.463647723197937},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.4613603949546814},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.35031092166900635},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32811179757118225},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2110786736011505}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.8441022038459778},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.8198142051696777},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6335320472717285},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5050106644630432},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.5026288032531738},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.463647723197937},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.4613603949546814},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.35031092166900635},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32811179757118225},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2110786736011505},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lcsys.2020.2979572","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lcsys.2020.2979572","pdf_url":null,"source":{"id":"https://openalex.org/S4306422535","display_name":"IEEE Control Systems Letters","issn_l":"2475-1456","issn":["2475-1456"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Control Systems Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5224980972","display_name":null,"funder_award_id":"S&AS-1849264","funder_id":"https://openalex.org/F4320335353","funder_display_name":"National Science Foundation of Sri Lanka"},{"id":"https://openalex.org/G6046910334","display_name":null,"funder_award_id":"CPS-1851588","funder_id":"https://openalex.org/F4320335353","funder_display_name":"National Science Foundation of Sri Lanka"},{"id":"https://openalex.org/G630408074","display_name":null,"funder_award_id":"W911NF-19-1-0270","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320335353","display_name":"National Science Foundation of Sri Lanka","ror":"https://ror.org/010xaa060"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1552094772","https://openalex.org/W1673923490","https://openalex.org/W1742307920","https://openalex.org/W1901616594","https://openalex.org/W1945616565","https://openalex.org/W1980026010","https://openalex.org/W1995875735","https://openalex.org/W1999588868","https://openalex.org/W2012173880","https://openalex.org/W2013301788","https://openalex.org/W2037031363","https://openalex.org/W2064111006","https://openalex.org/W2121863487","https://openalex.org/W2152161277","https://openalex.org/W2187013920","https://openalex.org/W2197836422","https://openalex.org/W2323065399","https://openalex.org/W2494110282","https://openalex.org/W2495451601","https://openalex.org/W2564717627","https://openalex.org/W2566006881","https://openalex.org/W2604394466","https://openalex.org/W2772589676","https://openalex.org/W2773691349","https://openalex.org/W2941205169","https://openalex.org/W2943852008","https://openalex.org/W2963098487","https://openalex.org/W2963178695","https://openalex.org/W2963207607","https://openalex.org/W2964153729","https://openalex.org/W2971113011","https://openalex.org/W4214717370","https://openalex.org/W4247013706","https://openalex.org/W4250516408","https://openalex.org/W4250589301","https://openalex.org/W6686674283","https://openalex.org/W6704139216"],"related_works":["https://openalex.org/W2502115930","https://openalex.org/W4246396837","https://openalex.org/W2482350142","https://openalex.org/W3176240006","https://openalex.org/W3126451824","https://openalex.org/W1561927205","https://openalex.org/W3191453585","https://openalex.org/W4297672492","https://openalex.org/W4288019534","https://openalex.org/W4310988119"],"abstract_inverted_index":{"This":[0],"letter,":[1],"presents":[2],"an":[3,22],"\u201con-off\u201d":[4,33],"learning-based":[5],"scheme":[6],"to":[7,28,35,68,74,82],"expand":[8],"the":[9,39,52,55,70],"attacker's":[10],"surface,":[11],"namely":[12],"a":[13,80],"moving":[14],"target":[15],"defense":[16],"(MTD)":[17],"framework,":[18],"while":[19],"optimally":[20],"stabilizing":[21],"unknown":[23],"system.":[24],"We":[25,46],"leverage":[26],"Q-learning":[27],"learn":[29],"optimal":[30],"strategies":[31],"with":[32],"actuation":[34],"promote":[36],"unpredictability":[37],"of":[38,54],"learned":[40],"behavior":[41],"against":[42],"physically":[43],"plausible":[44],"attacks.":[45],"provide":[47],"rigorous,":[48],"theoretical":[49],"guarantees":[50],"on":[51,79],"stability":[53],"equilibrium":[56],"point":[57],"even":[58],"when":[59],"switching.":[60],"Finally,":[61],"we":[62],"develop":[63],"two":[64],"adversarial":[65],"threat":[66],"models":[67],"evaluate":[69],"learning":[71],"agent's":[72],"ability":[73],"generate":[75],"robust":[76],"policies":[77],"based":[78],"distance":[81],"uncontrollability.":[83]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
