{"id":"https://openalex.org/W3091896612","doi":"https://doi.org/10.1145/3320269.3384733","title":"SirenAttack: Generating Adversarial Audio for End-to-End Acoustic Systems","display_name":"SirenAttack: Generating Adversarial Audio for End-to-End Acoustic Systems","publication_year":2020,"publication_date":"2020-10-05","ids":{"openalex":"https://openalex.org/W3091896612","doi":"https://doi.org/10.1145/3320269.3384733","mag":"3091896612"},"language":"en","primary_location":{"id":"doi:10.1145/3320269.3384733","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3320269.3384733","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th ACM Asia Conference on Computer and Communications Security","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103091450","display_name":"Tianyu Du","orcid":"https://orcid.org/0000-0003-1141-157X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianyu Du","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058611515","display_name":"Shouling Ji","orcid":"https://orcid.org/0000-0003-4268-372X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shouling Ji","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100428951","display_name":"Jinfeng Li","orcid":"https://orcid.org/0000-0002-2559-1375"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinfeng Li","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054310469","display_name":"Qinchen Gu","orcid":"https://orcid.org/0000-0001-5678-5212"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qinchen Gu","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100428026","display_name":"Ting Wang","orcid":"https://orcid.org/0000-0003-4927-5833"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ting Wang","raw_affiliation_strings":["Pennsylvania State University, State College, PA, USA"],"affiliations":[{"raw_affiliation_string":"Pennsylvania State University, State College, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033073212","display_name":"Raheem Beyah","orcid":"https://orcid.org/0000-0002-9188-3464"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Raheem Beyah","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103091450"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":11.1068,"has_fulltext":false,"cited_by_count":117,"citation_normalized_percentile":{"value":0.99074798,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"357","last_page":"369"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9783999919891357,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7901164293289185},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7608184814453125},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5806007385253906},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5510035157203674},{"id":"https://openalex.org/keywords/upsampling","display_name":"Upsampling","score":0.5450551509857178},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5265196561813354},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4092302620410919}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7901164293289185},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7608184814453125},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5806007385253906},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5510035157203674},{"id":"https://openalex.org/C110384440","wikidata":"https://www.wikidata.org/wiki/Q1143270","display_name":"Upsampling","level":3,"score":0.5450551509857178},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5265196561813354},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4092302620410919},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3320269.3384733","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3320269.3384733","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th ACM Asia Conference on Computer and Communications Security","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.699999988079071,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W569478347","https://openalex.org/W1932198206","https://openalex.org/W2019482303","https://openalex.org/W2022217943","https://openalex.org/W2038484192","https://openalex.org/W2052666245","https://openalex.org/W2103279261","https://openalex.org/W2109364787","https://openalex.org/W2113278353","https://openalex.org/W2127141656","https://openalex.org/W2194775991","https://openalex.org/W2294068577","https://openalex.org/W2481718677","https://openalex.org/W2526050071","https://openalex.org/W2527729766","https://openalex.org/W2543927648","https://openalex.org/W2549139847","https://openalex.org/W2554096094","https://openalex.org/W2575585029","https://openalex.org/W2593116425","https://openalex.org/W2593390416","https://openalex.org/W2751902866","https://openalex.org/W2760938034","https://openalex.org/W2767951891","https://openalex.org/W2770312844","https://openalex.org/W2791616807","https://openalex.org/W2897865027","https://openalex.org/W2902543210","https://openalex.org/W2962826786","https://openalex.org/W2962835968","https://openalex.org/W2962904371","https://openalex.org/W2962910554","https://openalex.org/W2963859254","https://openalex.org/W2964137095","https://openalex.org/W2964166828","https://openalex.org/W2964301649","https://openalex.org/W2975763460","https://openalex.org/W3022179901","https://openalex.org/W3024608270","https://openalex.org/W3105332166","https://openalex.org/W4300824008","https://openalex.org/W6640620246"],"related_works":["https://openalex.org/W2062399876","https://openalex.org/W2502115930","https://openalex.org/W2607795551","https://openalex.org/W2482350142","https://openalex.org/W4246396837","https://openalex.org/W3126451824","https://openalex.org/W3155117723","https://openalex.org/W1991429770","https://openalex.org/W1983892167","https://openalex.org/W2281134365"],"abstract_inverted_index":{"Despite":[0],"their":[1,104],"immense":[2],"popularity,":[3],"deep":[4,119],"learning-based":[5,120],"acoustic":[6,62,89,121],"systems":[7,20,63,122],"are":[8,164],"inherently":[9],"vulnerable":[10],"to":[11,21,34,56,76,98,107,189,204],"adversarial":[12,36,78,100,162,194],"attacks,":[13,41,192],"wherein":[14],"maliciously":[15],"crafted":[16],"audios":[17,79,101,163],"trigger":[18],"target":[19,88],"misbehave.":[22],"In":[23],"this":[24],"paper,":[25],"we":[26],"present":[27],"SirenAttack,":[28],"a":[29,45,58,115],"new":[30],"class":[31],"of":[32,47,60,117,141],"attacks":[33],"generate":[35,77,99],"audios.":[37],"Compared":[38],"with":[39,44,133],"existing":[40],"SirenAttack":[42,113],"highlights":[43],"set":[46,116],"significant":[48],"features:":[49],"(i)":[50],"versatile":[51],"--":[52,72,94],"it":[53,73,95,145],"is":[54,74,96],"able":[55,75,97],"deceive":[57],"range":[59],"end-to-end":[61],"under":[64],"both":[65],"white-box":[66],"and":[67,91,129,139,179,198],"black-box":[68],"settings;":[69],"(ii)":[70],"effective":[71],"that":[80],"can":[81],"be":[82],"recognized":[83],"as":[84],"specific":[85],"phrases":[86],"by":[87,167],"systems;":[90],"(iii)":[92],"stealthy":[93],"indistinguishable":[102],"from":[103],"benign":[105],"counterparts":[106],"human":[108],"perception.":[109],"We":[110,182],"empirically":[111],"evaluate":[112,184],"on":[114,151],"state-of-the-art":[118],"(including":[123],"speech":[124],"command":[125],"recognition,":[126],"speaker":[127],"recognition":[128],"sound":[130],"event":[131],"classification),":[132],"results":[134],"showing":[135],"the":[136,152,156,160],"versatility,":[137],"effectiveness,":[138],"stealthiness":[140],"SirenAttack.":[142],"For":[143],"instance,":[144],"achieves":[146],"99.45%":[147],"attack":[148],"success":[149],"rate":[150],"IEMOCAP":[153],"dataset":[154],"against":[155],"ResNet18":[157],"model,":[158],"while":[159],"generated":[161],"also":[165],"misinterpreted":[166],"multiple":[168],"popular":[169],"ASR":[170],"platforms,":[171],"including":[172,193],"Google":[173],"Cloud":[174],"Speech,":[175],"Microsoft":[176],"Bing":[177],"Voice,":[178],"IBM":[180],"Speech-to-Text.":[181],"further":[183,208],"three":[185],"potential":[186],"defense":[187],"methods":[188],"mitigate":[190],"such":[191],"training,":[195],"audio":[196],"downsampling,":[197],"moving":[199],"average":[200],"filtering,":[201],"which":[202],"leads":[203],"promising":[205],"directions":[206],"for":[207],"research.":[209]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":20},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":26},{"year":2022,"cited_by_count":21},{"year":2021,"cited_by_count":17},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":5}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
