{"id":"https://openalex.org/W4385975749","doi":"https://doi.org/10.1109/taslp.2023.3306714","title":"Timbre-Reserved Adversarial Attack in Speaker Identification","display_name":"Timbre-Reserved Adversarial Attack in Speaker Identification","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4385975749","doi":"https://doi.org/10.1109/taslp.2023.3306714"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3306714","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3306714","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107808955","display_name":"Qing Wang","orcid":"https://orcid.org/0009-0008-5449-4815"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qing Wang","raw_affiliation_strings":["Audio, Speech and Langauge Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi&#x0027;an, China","Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xian, Shaanxi, China"],"raw_orcid":"https://orcid.org/0009-0008-5449-4815","affiliations":[{"raw_affiliation_string":"Audio, Speech and Langauge Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xian, Shaanxi, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015560758","display_name":"Jixun Yao","orcid":"https://orcid.org/0000-0002-5324-7360"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jixun Yao","raw_affiliation_strings":["Audio, Speech and Langauge Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi&#x0027;an, China","Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xian, Shaanxi, China"],"raw_orcid":"https://orcid.org/0000-0002-5324-7360","affiliations":[{"raw_affiliation_string":"Audio, Speech and Langauge Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xian, Shaanxi, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100425399","display_name":"Li Zhang","orcid":"https://orcid.org/0000-0001-5077-9962"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Zhang","raw_affiliation_strings":["Audio, Speech and Langauge Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi&#x0027;an, China","Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xian, Shaanxi, China"],"raw_orcid":"https://orcid.org/0000-0001-5077-9962","affiliations":[{"raw_affiliation_string":"Audio, Speech and Langauge Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xian, Shaanxi, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101783173","display_name":"Pengcheng Guo","orcid":"https://orcid.org/0009-0001-2388-5935"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengcheng Guo","raw_affiliation_strings":["Audio, Speech and Langauge Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi&#x0027;an, China","Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xian, Shaanxi, China"],"raw_orcid":"https://orcid.org/0009-0001-2388-5935","affiliations":[{"raw_affiliation_string":"Audio, Speech and Langauge Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"Audio, Speech and Language Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xian, Shaanxi, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100668966","display_name":"Lei Xie","orcid":"https://orcid.org/0000-0001-8234-0823"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Xie","raw_affiliation_strings":["Audio, Speech and Langauge Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi&#x0027;an, China"],"raw_orcid":"https://orcid.org/0000-0001-8234-0823","affiliations":[{"raw_affiliation_string":"Audio, Speech and Langauge Processing Group (ASLP@NPU), School of Computer Science, Northwestern Polytechnical University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I17145004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.979,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.80574452,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"31","issue":null,"first_page":"3848","last_page":"3858"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9850000143051147,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.8299175500869751},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7935639023780823},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7478820085525513},{"id":"https://openalex.org/keywords/spoofing-attack","display_name":"Spoofing attack","score":0.7167046666145325},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6273148655891418},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5449553728103638},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.46898016333580017},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4511527121067047},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4416981637477875},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35688430070877075},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.2828029692173004},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09442916512489319}],"concepts":[{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.8299175500869751},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7935639023780823},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7478820085525513},{"id":"https://openalex.org/C167900197","wikidata":"https://www.wikidata.org/wiki/Q11081100","display_name":"Spoofing attack","level":2,"score":0.7167046666145325},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6273148655891418},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5449553728103638},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.46898016333580017},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4511527121067047},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4416981637477875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35688430070877075},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.2828029692173004},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09442916512489319},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2023.3306714","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3306714","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8299999833106995,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":83,"referenced_works":["https://openalex.org/W748732769","https://openalex.org/W1509691205","https://openalex.org/W1673923490","https://openalex.org/W1936725236","https://openalex.org/W1945616565","https://openalex.org/W2123299109","https://openalex.org/W2150962366","https://openalex.org/W2165880886","https://openalex.org/W2176804518","https://openalex.org/W2187089797","https://openalex.org/W2405506115","https://openalex.org/W2518172956","https://openalex.org/W2519091744","https://openalex.org/W2584032004","https://openalex.org/W2588445447","https://openalex.org/W2745896134","https://openalex.org/W2782403400","https://openalex.org/W2782892391","https://openalex.org/W2890964092","https://openalex.org/W2902070858","https://openalex.org/W2923292931","https://openalex.org/W2936802426","https://openalex.org/W2946200149","https://openalex.org/W2962747881","https://openalex.org/W2962788625","https://openalex.org/W2963058500","https://openalex.org/W2963077926","https://openalex.org/W2963242190","https://openalex.org/W2963539064","https://openalex.org/W2963542245","https://openalex.org/W2963609956","https://openalex.org/W2963857521","https://openalex.org/W2964243274","https://openalex.org/W2964301649","https://openalex.org/W2965116050","https://openalex.org/W2967606780","https://openalex.org/W2969985801","https://openalex.org/W2972394484","https://openalex.org/W2972908178","https://openalex.org/W3007679772","https://openalex.org/W3015811740","https://openalex.org/W3015958938","https://openalex.org/W3024869864","https://openalex.org/W3033411150","https://openalex.org/W3054852761","https://openalex.org/W3084424094","https://openalex.org/W3092028330","https://openalex.org/W3094002217","https://openalex.org/W3095259706","https://openalex.org/W3095570773","https://openalex.org/W3096086473","https://openalex.org/W3098557217","https://openalex.org/W3131867595","https://openalex.org/W3153453329","https://openalex.org/W3155956353","https://openalex.org/W3161627112","https://openalex.org/W3162517041","https://openalex.org/W3168719651","https://openalex.org/W3169905056","https://openalex.org/W3197478142","https://openalex.org/W3198123200","https://openalex.org/W3200167423","https://openalex.org/W3201506562","https://openalex.org/W3201773091","https://openalex.org/W4221138880","https://openalex.org/W4224923642","https://openalex.org/W4249468441","https://openalex.org/W4283015658","https://openalex.org/W4297841605","https://openalex.org/W6637162671","https://openalex.org/W6640425456","https://openalex.org/W6732429163","https://openalex.org/W6736996214","https://openalex.org/W6742386808","https://openalex.org/W6746882984","https://openalex.org/W6747317711","https://openalex.org/W6760326341","https://openalex.org/W6763832098","https://openalex.org/W6778823374","https://openalex.org/W6783867762","https://openalex.org/W6785090365","https://openalex.org/W6796464841","https://openalex.org/W6800767084"],"related_works":["https://openalex.org/W1496222301","https://openalex.org/W3207760230","https://openalex.org/W1590307681","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W2502115930","https://openalex.org/W2358353312","https://openalex.org/W2353836703","https://openalex.org/W2954943288","https://openalex.org/W2726823740"],"abstract_inverted_index":{"As":[0,80],"a":[1,6,39,52,93,140,195],"type":[2],"of":[3,16,25,64,125,134,164,222,286,325],"biometric":[4],"identification,":[5],"speaker":[7,103,146,178,196,322],"identification":[8],"(SID)":[9],"system":[10,36,74,88],"is":[11,174,192,226,233,294],"confronted":[12],"with":[13,263],"various":[14],"kinds":[15],"attacks.":[17],"The":[18,204],"spoofing":[19,49],"attacks":[20,32],"typically":[21],"imitate":[22],"the":[23,26,30,34,48,56,62,65,72,76,82,86,99,107,116,123,126,132,135,145,150,160,165,171,176,182,187,200,211,216,220,223,227,238,244,257,264,275,284,302,306,317,326],"timbre":[24,54,104,133],"target":[27,136,177,321],"speakers,":[28],"while":[29],"adversarial":[31,41,83,142,152,157,172,183,205,229,272,299,312],"confuse":[33],"SID":[35,66,73,87,119,127,239],"by":[37,154,194,290],"adding":[38,155,298],"well-designed":[40],"perturbation":[42,184,300],"to":[43,92,114,180,186,209,214,274,301],"an":[44,156,271],"arbitrary":[45],"speech.":[46,277],"Although":[47],"attack":[50,109,117,143,258,276],"copies":[51],"similar":[53],"as":[55],"victim,":[57],"it":[58,96],"does":[59],"not":[60,70,120],"exploit":[61],"vulnerability":[63,124],"model":[67,128,189,202,213,225,267],"and":[68,191,235,279,320],"may":[69],"make":[71,115],"give":[75],"attacker's":[77],"desired":[78],"decision.":[79],"for":[81,106],"attack,":[84],"despite":[85],"can":[89,207,236],"be":[90],"led":[91],"designated":[94],"decision,":[95],"cannot":[97],"meet":[98,316],"specified":[100,318],"text":[101,319],"or":[102],"requirements":[105,324],"specific":[108],"scenarios.":[110],"In":[111],"this":[112],"study,":[113],"in":[118,144,199],"only":[121],"leverage":[122],"but":[129],"also":[130,315],"reserve":[131],"speaker,":[137],"we":[138],"propose":[139],"timbre-reserved":[141,151,234,323],"identification.":[147],"We":[148],"generate":[149,215],"audios":[153,314],"constraint":[158,173,206],"during":[159],"different":[161],"training":[162],"stages":[163],"voice":[166],"conversion":[167],"(VC)":[168],"model.":[169],"Specifically,":[170],"using":[175],"label":[179],"optimize":[181],"added":[185],"VC":[188,201,212,224,266],"representations":[190],"implemented":[193],"classifier":[197],"joining":[198],"training.":[203],"help":[208],"control":[210],"speaker-wised":[217],"audio.":[218,304],"Eventually,":[219],"inference":[221],"ideal":[228],"fake":[230,287,313],"audio,":[231],"which":[232],"fool":[237],"system.":[240],"Experimental":[241],"results":[242],"on":[243],"Audio":[245],"deepfake":[246],"detection":[247],"(ADD)":[248],"challenge":[249],"dataset":[250],"indicate":[251],"that":[252,283,309],"our":[253,291,310],"proposed":[254,292],"method":[255,293],"improves":[256],"success":[259],"rate":[260],"significantly":[261],"compare":[262],"vanilla":[265],"without":[268],"additionally":[269],"introducing":[270],"noise":[273],"Objective":[278],"subjective":[280],"evaluations":[281],"illustrate":[282],"quality":[285],"audio":[288],"generated":[289,311],"better":[295],"than":[296],"directly":[297],"VC-generated":[303],"Furthermore,":[305],"analysis":[307],"shows":[308],"attacker.":[327]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
