{"id":"https://openalex.org/W4391640631","doi":"https://doi.org/10.1109/taslp.2024.3364100","title":"Interpretable Spectrum Transformation Attacks to Speaker Recognition Systems","display_name":"Interpretable Spectrum Transformation Attacks to Speaker Recognition Systems","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391640631","doi":"https://doi.org/10.1109/taslp.2024.3364100"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3364100","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3364100","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076933423","display_name":"Jiadi Yao","orcid":null},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiadi Yao","raw_affiliation_strings":["School of Marine Science and Technology, Northwestern Polytechnical University, Xi&#x0027;an, China","Research & Development Institute of Northwestern Polytechnical University in Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Marine Science and Technology, Northwestern Polytechnical University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"Research & Development Institute of Northwestern Polytechnical University in Shenzhen, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106894210","display_name":"Hong Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Luo","raw_affiliation_strings":["China Mobile (Hangzhou) Information Technology Company Ltd., Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"China Mobile (Hangzhou) Information Technology Company Ltd., Hangzhou, China","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090370578","display_name":"Jun Qi","orcid":"https://orcid.org/0000-0003-0970-8462"},"institutions":[{"id":"https://openalex.org/I141568987","display_name":"Hong Kong Baptist University","ror":"https://ror.org/0145fw131","country_code":"HK","type":"education","lineage":["https://openalex.org/I141568987"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jun Qi","raw_affiliation_strings":["Department of Computer Science, Hong Kong Baptist University, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Hong Kong Baptist University, Hong Kong, China","institution_ids":["https://openalex.org/I141568987"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100450091","display_name":"Xiao-Lei Zhang","orcid":"https://orcid.org/0000-0001-7694-193X"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao-Lei Zhang","raw_affiliation_strings":["School of Marine Science and Technology, Northwestern Polytechnical University, Xi&#x0027;an, China","Research & Development Institute of Northwestern Polytechnical University in Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Marine Science and Technology, Northwestern Polytechnical University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"Research & Development Institute of Northwestern Polytechnical University in Shenzhen, China","institution_ids":["https://openalex.org/I17145004"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5076933423"],"corresponding_institution_ids":["https://openalex.org/I17145004"],"apc_list":null,"apc_paid":null,"fwci":3.415,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.9287815,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"32","issue":null,"first_page":"1531","last_page":"1545"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.7974468469619751},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7574598789215088},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.6933948993682861},{"id":"https://openalex.org/keywords/transferability","display_name":"Transferability","score":0.5201500058174133},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5161041021347046},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.5130226016044617},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.503689706325531},{"id":"https://openalex.org/keywords/black-box","display_name":"Black box","score":0.4571303427219391},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.45462679862976074},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4305438697338104},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4281063675880432},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4071298837661743},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.40507590770721436},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09176364541053772}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.7974468469619751},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7574598789215088},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.6933948993682861},{"id":"https://openalex.org/C61272859","wikidata":"https://www.wikidata.org/wiki/Q7834031","display_name":"Transferability","level":3,"score":0.5201500058174133},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5161041021347046},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.5130226016044617},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.503689706325531},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.4571303427219391},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.45462679862976074},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4305438697338104},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4281063675880432},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4071298837661743},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.40507590770721436},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09176364541053772},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C140331021","wikidata":"https://www.wikidata.org/wiki/Q1868104","display_name":"Logit","level":2,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3364100","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3364100","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5057566376","display_name":null,"funder_award_id":"62176211","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":72,"referenced_works":["https://openalex.org/W111477576","https://openalex.org/W1494198834","https://openalex.org/W1552314771","https://openalex.org/W2041823554","https://openalex.org/W2046056978","https://openalex.org/W2058223056","https://openalex.org/W2123299109","https://openalex.org/W2165880886","https://openalex.org/W2243397390","https://openalex.org/W2535873859","https://openalex.org/W2543927648","https://openalex.org/W2588445447","https://openalex.org/W2726515241","https://openalex.org/W2746600820","https://openalex.org/W2765793020","https://openalex.org/W2774644650","https://openalex.org/W2808631503","https://openalex.org/W2890964092","https://openalex.org/W2936774411","https://openalex.org/W2962747881","https://openalex.org/W2962858109","https://openalex.org/W2963070423","https://openalex.org/W2963542245","https://openalex.org/W2963857521","https://openalex.org/W2969542116","https://openalex.org/W2969945254","https://openalex.org/W2972986505","https://openalex.org/W2996800219","https://openalex.org/W3013020904","https://openalex.org/W3015625436","https://openalex.org/W3015811740","https://openalex.org/W3017720918","https://openalex.org/W3024869864","https://openalex.org/W3034175346","https://openalex.org/W3034214559","https://openalex.org/W3035253074","https://openalex.org/W3047561893","https://openalex.org/W3054852761","https://openalex.org/W3084424094","https://openalex.org/W3095259706","https://openalex.org/W3096023981","https://openalex.org/W3096614974","https://openalex.org/W3106412272","https://openalex.org/W3128911095","https://openalex.org/W3131867595","https://openalex.org/W3137249133","https://openalex.org/W3153453329","https://openalex.org/W3160325739","https://openalex.org/W3163083600","https://openalex.org/W3163505255","https://openalex.org/W3163596559","https://openalex.org/W3176482836","https://openalex.org/W3191453585","https://openalex.org/W4214502238","https://openalex.org/W4223587462","https://openalex.org/W4285149944","https://openalex.org/W4293846201","https://openalex.org/W4297841773","https://openalex.org/W4304701424","https://openalex.org/W4312420471","https://openalex.org/W4312648479","https://openalex.org/W4315778510","https://openalex.org/W4372266914","https://openalex.org/W6640425456","https://openalex.org/W6729756640","https://openalex.org/W6739868092","https://openalex.org/W6746608116","https://openalex.org/W6750404860","https://openalex.org/W6752985256","https://openalex.org/W6766816269","https://openalex.org/W6768366551","https://openalex.org/W6838964429"],"related_works":["https://openalex.org/W4288055406","https://openalex.org/W4200630034","https://openalex.org/W3137894200","https://openalex.org/W3092178728","https://openalex.org/W4226402597","https://openalex.org/W3132910851","https://openalex.org/W4377864639","https://openalex.org/W4392340763","https://openalex.org/W4283325551","https://openalex.org/W4403006689"],"abstract_inverted_index":{"The":[0],"success":[1],"of":[2,36,78,102,111,152,168,187,196,236,241],"adversarial":[3,16,38,80,113,197],"attacks":[4,212],"on":[5,68,261],"speaker":[6,214],"recognition":[7,215],"is":[8,40],"mainly":[9],"in":[10,55,115,132,141,213],"white-box":[11,23,179],"scenarios.":[12],"When":[13],"applying":[14],"the":[15,34,37,76,79,93,100,108,112,116,133,136,142,147,153,166,193,200,217,229,234,239,242,245,249,262],"voices":[17,39,81,131,140,198],"that":[18,119,129,228],"are":[19,120,259],"generated":[20],"by":[21,199,252],"attacking":[22],"surrogate":[24,180,188],"models":[25],"to":[26,74,82,92,122],"black-box":[27,32,84],"victim":[28,85],"models,":[29],"i.e.":[30],"transfer-based":[31,211],"attacks,":[33],"transferability":[35,77],"not":[41,172],"only":[42,173],"far":[43],"from":[44],"satisfactory,":[45],"but":[46,182],"also":[47,183],"lacks":[48],"interpretable":[49,208],"basis.":[50],"To":[51,164],"address":[52],"these":[53],"issues,":[54],"this":[56],"paper,":[57],"we":[58,88,97,171,191],"propose":[59],"a":[60,83,123,177,253],"general":[61],"framework,":[62],"named":[63],"spectral":[64],"transformation":[65],"attack":[66],"based":[67],"modified":[69],"discrete":[70],"cosine":[71],"transform":[72],"(STA-MDCT),":[73],"improve":[75],"model.":[86],"Specifically,":[87],"first":[89,218],"apply":[90],"MDCT":[91],"input":[94],"voice.":[95],"Then,":[96],"slightly":[98],"modify":[99],"energy":[101],"different":[103],"frequency":[104],"bands":[105],"for":[106,210,216],"capturing":[107],"salient":[109],"regions":[110],"noise":[114],"time-frequency":[117,143],"domain":[118],"critical":[121],"successful":[124],"attack.":[125,154],"Unlike":[126],"existing":[127],"approaches":[128],"operate":[130],"time":[134],"domain,":[135,144],"proposed":[137,246],"framework":[138],"operates":[139],"which":[145,205],"improves":[146],"interpretability,":[148],"transferability,":[149],"and":[150,238],"imperceptibility":[151],"Moreover,":[155],"it":[156],"can":[157],"be":[158],"implemented":[159],"with":[160,176,184,223],"any":[161],"gradient-based":[162],"attackers.":[163],"utilize":[165],"advantage":[167],"model":[169,181],"ensembling,":[170],"implement":[174],"STA-MDCT":[175,237],"single":[178],"an":[185,207],"ensemble":[186],"models.":[189],"Finally,":[190],"visualize":[192],"saliency":[194],"maps":[195,203],"class":[201],"activation":[202],"(CAM),":[204],"offer":[206],"basis":[209],"time.":[219],"Extensive":[220],"comparison":[221,243,250],"results":[222],"six":[224],"representative":[225],"attackers":[226],"show":[227],"CAM":[230],"visualization":[231],"clearly":[232],"explains":[233],"effectiveness":[235],"weaknesses":[240],"methods;":[244],"method":[247],"outperforms":[248],"methods":[251],"large":[254],"margin.":[255],"Our":[256],"audio":[257],"samples":[258],"available":[260],"demo":[263],"website.":[264]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
