{"id":"https://openalex.org/W4400913287","doi":"https://doi.org/10.1145/3665451.3665532","title":"Towards Evaluating the Robustness of Automatic Speech Recognition Systems via Audio Style Transfer","display_name":"Towards Evaluating the Robustness of Automatic Speech Recognition Systems via Audio Style Transfer","publication_year":2024,"publication_date":"2024-07-02","ids":{"openalex":"https://openalex.org/W4400913287","doi":"https://doi.org/10.1145/3665451.3665532"},"language":"en","primary_location":{"id":"doi:10.1145/3665451.3665532","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3665451.3665532","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd ACM Workshop on Secure and Trustworthy Deep Learning Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102666389","display_name":"Weifei Jin","orcid":"https://orcid.org/0009-0002-8209-2713"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weifei Jin","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0009-0002-8209-2713","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101580401","display_name":"Yuxin Cao","orcid":"https://orcid.org/0000-0002-0584-7257"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxin Cao","raw_affiliation_strings":["Tsinghua University, China"],"raw_orcid":"https://orcid.org/0009-0002-5766-0846","affiliations":[{"raw_affiliation_string":"Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062286286","display_name":"Junjie Su","orcid":"https://orcid.org/0009-0007-4262-6131"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junjie Su","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0009-0007-4262-6131","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011642971","display_name":"Qi Shen","orcid":"https://orcid.org/0009-0008-5595-9282"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Shen","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0009-0008-5595-9282","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103054783","display_name":"Kai Ye","orcid":"https://orcid.org/0009-0004-2177-5531"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Ye","raw_affiliation_strings":["Tsinghua University, China"],"raw_orcid":"https://orcid.org/0009-0004-2177-5531","affiliations":[{"raw_affiliation_string":"Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081805488","display_name":"Derui Wang","orcid":"https://orcid.org/0000-0003-1388-7715"},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"government","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Derui Wang","raw_affiliation_strings":["CSIRO's Data61, Australia"],"raw_orcid":"https://orcid.org/0000-0003-1388-7715","affiliations":[{"raw_affiliation_string":"CSIRO's Data61, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042001449","display_name":"Jie Hao","orcid":"https://orcid.org/0000-0003-2032-665X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Hao","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, China"],"raw_orcid":"https://orcid.org/0000-0003-2032-665X","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101492724","display_name":"Ziyao Liu","orcid":"https://orcid.org/0000-0003-4060-0839"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ziyao Liu","raw_affiliation_strings":["Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0003-4060-0839","affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5102666389"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.9934,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.79625542,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"47","last_page":"55"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7693946361541748},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7459000945091248},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.707545816898346},{"id":"https://openalex.org/keywords/style","display_name":"Style (visual arts)","score":0.41676050424575806},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35820508003234863},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3413125276565552}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7693946361541748},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7459000945091248},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.707545816898346},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.41676050424575806},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35820508003234863},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3413125276565552},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3665451.3665532","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3665451.3665532","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd ACM Workshop on Secure and Trustworthy Deep Learning Systems","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7218046545","display_name":null,"funder_award_id":"61801049","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2127141656","https://openalex.org/W2156279557","https://openalex.org/W2193413348","https://openalex.org/W2475287302","https://openalex.org/W2766465839","https://openalex.org/W2783113218","https://openalex.org/W2803853585","https://openalex.org/W2892852825","https://openalex.org/W2898435086","https://openalex.org/W2923292931","https://openalex.org/W2945478979","https://openalex.org/W2954420970","https://openalex.org/W2963058500","https://openalex.org/W2963077926","https://openalex.org/W2963857521","https://openalex.org/W2964539095","https://openalex.org/W2997395473","https://openalex.org/W2998572311","https://openalex.org/W3017720918","https://openalex.org/W3020570669","https://openalex.org/W3082827059","https://openalex.org/W3093004624","https://openalex.org/W3109668151","https://openalex.org/W3135197931","https://openalex.org/W3138740729","https://openalex.org/W3155956353","https://openalex.org/W3198104520","https://openalex.org/W3205631867","https://openalex.org/W3207651366","https://openalex.org/W4220922534","https://openalex.org/W4225939199","https://openalex.org/W4290943474","https://openalex.org/W4308642081","https://openalex.org/W4312322018","https://openalex.org/W4367859498","https://openalex.org/W4385080387","https://openalex.org/W4386362951","https://openalex.org/W6751569023","https://openalex.org/W6810132989"],"related_works":["https://openalex.org/W2356229341","https://openalex.org/W2349768204","https://openalex.org/W2770593030","https://openalex.org/W3154990682","https://openalex.org/W2560201613","https://openalex.org/W2171975302","https://openalex.org/W4313326281","https://openalex.org/W2022352247","https://openalex.org/W574867512","https://openalex.org/W3204019825"],"abstract_inverted_index":{"In":[0,110],"light":[1],"of":[2,6,28,44,52,71,107,130,180],"the":[3,26,42,50,103,128,170],"widespread":[4],"application":[5],"Automatic":[7],"Speech":[8],"Recognition":[9],"(ASR)":[10],"systems,":[11,47],"their":[12],"security":[13],"concerns":[14],"have":[15,34],"received":[16],"much":[17],"more":[18],"attention":[19],"than":[20],"ever":[21],"before,":[22],"primarily":[23],"due":[24,188],"to":[25,84,158,189],"susceptibility":[27],"Deep":[29],"Neural":[30],"Networks.":[31],"Previous":[32],"studies":[33],"illustrated":[35],"that":[36,165],"surreptitiously":[37],"crafting":[38],"adversarial":[39,86,140],"perturbations":[40,62],"enables":[41],"manipulation":[43],"speech":[45],"recognition":[46],"resulting":[48],"in":[49,142,182],"production":[51],"malicious":[53],"commands.":[54],"These":[55],"attack":[56,116,141],"methods":[57],"mostly":[58],"require":[59],"adding":[60],"noise":[61],"under":[63],"\u2113p":[64],"norm":[65],"constraints,":[66],"inevitably":[67],"leaving":[68],"behind":[69],"artifacts":[70],"manual":[72],"modifications.":[73],"Recent":[74],"research":[75],"has":[76],"alleviated":[77],"this":[78,111],"limitation":[79],"by":[80],"manipulating":[81],"style":[82,95,123,137],"vectors":[83],"synthesize":[85],"examples":[87],"based":[88,97,120],"on":[89,98,117,121],"Text-to-Speech":[90],"(TTS)":[91],"synthesis":[92],"audio.":[93],"However,":[94],"modifications":[96],"optimization":[99],"objectives":[100],"significantly":[101],"reduce":[102],"controllability":[104],"and":[105,139,175],"editability":[106],"audio":[108,160],"styles.":[109],"paper,":[112],"we":[113,150],"propose":[114,151],"an":[115,148,152],"ASR":[118],"systems":[119],"user-customized":[122,173],"transfer.":[124],"We":[125],"first":[126],"test":[127],"effect":[129],"Style":[131,154],"Transfer":[132],"Attack":[133,156],"(STA)":[134],"which":[135],"combines":[136],"transfer":[138],"sequential":[143],"order.":[144],"And":[145],"then,":[146],"as":[147],"improvement,":[149],"iterative":[153],"Code":[155],"(SCA)":[157],"maintain":[159],"quality.":[161],"Experimental":[162],"results":[163],"show":[164],"our":[166,190],"method":[167],"can":[168],"meet":[169],"need":[171],"for":[172],"styles":[174],"achieve":[176],"a":[177],"success":[178],"rate":[179],"82%":[181],"attacks,":[183],"while":[184],"keeping":[185],"sound":[186],"naturalness":[187],"user":[191],"study.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
