{"id":"https://openalex.org/W3164878287","doi":"https://doi.org/10.1145/3433210.3453090","title":"Stealing Deep Reinforcement Learning Models for Fun and Profit","display_name":"Stealing Deep Reinforcement Learning Models for Fun and Profit","publication_year":2021,"publication_date":"2021-05-24","ids":{"openalex":"https://openalex.org/W3164878287","doi":"https://doi.org/10.1145/3433210.3453090","mag":"3164878287"},"language":"en","primary_location":{"id":"doi:10.1145/3433210.3453090","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3433210.3453090","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 ACM Asia Conference on Computer and Communications Security","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090278564","display_name":"Kangjie Chen","orcid":"https://orcid.org/0000-0001-5099-7054"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Kangjie Chen","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073264981","display_name":"Shangwei Guo","orcid":"https://orcid.org/0000-0002-6443-5308"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shangwei Guo","raw_affiliation_strings":["Chongqing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"Chongqing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101591101","display_name":"Tianwei Zhang","orcid":"https://orcid.org/0000-0001-6595-6650"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Tianwei Zhang","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084396416","display_name":"Xiaofei Xie","orcid":"https://orcid.org/0000-0002-1288-6502"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xiaofei Xie","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100355692","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0001-7300-9215"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yang Liu","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5090278564"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":3.3992,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.93522742,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"307","last_page":"319"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12122","display_name":"Physical Unclonable Functions (PUFs) and Hardware Security","score":0.975600004196167,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.811112642288208},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7569142580032349},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6757819652557373},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5974565148353577},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5344082117080688},{"id":"https://openalex.org/keywords/adversary","display_name":"Adversary","score":0.5067744851112366},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.46402624249458313},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4589861333370209},{"id":"https://openalex.org/keywords/learning-classifier-system","display_name":"Learning classifier system","score":0.4501872956752777},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.4442242980003357},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4362753629684448},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.17122933268547058},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14204490184783936}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.811112642288208},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7569142580032349},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6757819652557373},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5974565148353577},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5344082117080688},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.5067744851112366},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.46402624249458313},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4589861333370209},{"id":"https://openalex.org/C199190896","wikidata":"https://www.wikidata.org/wiki/Q3509276","display_name":"Learning classifier system","level":3,"score":0.4501872956752777},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.4442242980003357},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4362753629684448},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.17122933268547058},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14204490184783936},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3433210.3453090","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3433210.3453090","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 ACM Asia Conference on Computer and Communications Security","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8199999928474426,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1949804828","https://openalex.org/W2031571562","https://openalex.org/W2064675550","https://openalex.org/W2119717200","https://openalex.org/W2146950091","https://openalex.org/W2180612164","https://openalex.org/W2282821441","https://openalex.org/W2434014514","https://openalex.org/W2525579820","https://openalex.org/W2579318729","https://openalex.org/W2604382266","https://openalex.org/W2749928749","https://openalex.org/W2789304371","https://openalex.org/W2806082141","https://openalex.org/W2808195004","https://openalex.org/W2809523935","https://openalex.org/W2897830718","https://openalex.org/W2921058674","https://openalex.org/W2957905354","https://openalex.org/W2962755762","https://openalex.org/W2962847335","https://openalex.org/W2962887844","https://openalex.org/W2963303354","https://openalex.org/W2963465081","https://openalex.org/W2963560987","https://openalex.org/W2963844355","https://openalex.org/W2963857521","https://openalex.org/W2969695741","https://openalex.org/W2989885118","https://openalex.org/W2997293639","https://openalex.org/W3000499753","https://openalex.org/W3007318395","https://openalex.org/W3035200689","https://openalex.org/W3036286896","https://openalex.org/W3084432478","https://openalex.org/W3091388282","https://openalex.org/W3102836279","https://openalex.org/W3103932910","https://openalex.org/W4299301436","https://openalex.org/W6604372272","https://openalex.org/W6748645729"],"related_works":["https://openalex.org/W4320018150","https://openalex.org/W4239582170","https://openalex.org/W2918664383","https://openalex.org/W106056076","https://openalex.org/W4320855730","https://openalex.org/W2135200719","https://openalex.org/W4313443006","https://openalex.org/W2945374968","https://openalex.org/W4293777179","https://openalex.org/W4385452045"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"the":[3,30,51,73,84,115,120,140,143,156,181,206],"first":[4,109],"model":[5,24,88,124,141,174],"extraction":[6,33,89,175],"attack":[7,176],"against":[8,35],"Deep":[9,37],"Reinforcement":[10],"Learning":[11,38],"(DRL),":[12],"which":[13],"enables":[14],"an":[15],"external":[16],"adversary":[17],"to":[18,50,56,71,92,98,113,138,170,205],"precisely":[19],"recover":[20,155],"a":[21,68,95,111,202],"black-box":[22,122],"DRL":[23,57,87,123,157,190,213],"only":[25,125],"from":[26,142],"its":[27,128],"interaction":[28],"with":[29,159],"environment.":[31],"Model":[32],"attacks":[34],"supervised":[36],"models":[39,158,191],"have":[40],"been":[41],"widely":[42],"studied.":[43],"However,":[44],"those":[45],"techniques":[46,137],"cannot":[47],"be":[48],"applied":[49],"reinforcement":[52],"learning":[53,136],"scenario":[54],"due":[55],"models'":[58],"high":[59,160],"complexity,":[60],"stochasticity":[61],"and":[62,131,162,187,209],"limited":[63],"observable":[64],"information.":[65],"We":[66,164],"propose":[67],"novel":[69],"methodology":[70,108,152],"overcome":[72],"above":[74],"challenges.":[75],"The":[76],"key":[77],"insight":[78],"of":[79,86,119,184,212],"our":[80,107,151,173],"approach":[81],"is":[82,90],"that":[83,150,172],"process":[85],"equivalent":[91],"imitation":[93,135],"learning,":[94],"well-established":[96],"solution":[97],"learn":[99],"sequential":[100],"decision-making":[101],"policies.":[102],"Based":[103],"on":[104,127],"this":[105],"observation,":[106],"builds":[110],"classifier":[112],"reveal":[114],"training":[116],"algorithm":[117,145],"family":[118],"targeted":[121],"based":[126],"predicted":[129],"actions,":[130],"then":[132],"leverages":[133],"state-of-the-art":[134],"replicate":[139],"identified":[144],"family.":[146],"Experimental":[147],"results":[148],"indicate":[149],"can":[153,177],"effectively":[154],"fidelity":[161],"accuracy.":[163],"also":[165],"demonstrate":[166],"two":[167],"use":[168],"cases":[169],"show":[171],"(1)":[178],"significantly":[179],"improve":[180],"success":[182],"rate":[183],"adversarial":[185],"attacks,":[186],"(2)":[188],"steal":[189],"stealthily":[192],"even":[193],"they":[194],"are":[195],"protected":[196],"by":[197],"DNN":[198],"watermarks.":[199],"These":[200],"pose":[201],"severe":[203],"threat":[204],"intellectual":[207],"property":[208],"privacy":[210],"protection":[211],"applications.":[214]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
