{"id":"https://openalex.org/W4406148177","doi":"https://doi.org/10.1145/3704323.3704334","title":"An End-to-End Audio Transformer with Multi-student Knowledge Distillation algorithm for Deepfake Speech Detection","display_name":"An End-to-End Audio Transformer with Multi-student Knowledge Distillation algorithm for Deepfake Speech Detection","publication_year":2024,"publication_date":"2024-10-25","ids":{"openalex":"https://openalex.org/W4406148177","doi":"https://doi.org/10.1145/3704323.3704334"},"language":"en","primary_location":{"id":"doi:10.1145/3704323.3704334","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3704323.3704334","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 13th International Conference on Computing and Pattern Recognition","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101350174","display_name":"Weidong An","orcid":null},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weidong An","raw_affiliation_strings":["School of Information Science and Technology, Beijing University of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-4906-5272","affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040147385","display_name":"Ruwei Li","orcid":"https://orcid.org/0000-0002-7828-2242"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruwei Li","raw_affiliation_strings":["School of Information Science and Technology, Beijing University Of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7828-2242","affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, Beijing University Of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112384696","display_name":"Haoyu Ge","orcid":null},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyu Ge","raw_affiliation_strings":["School of Information Science and Technology, Beijing University Of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0008-1823-6966","affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, Beijing University Of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061516366","display_name":"Man Li","orcid":null},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Man Li","raw_affiliation_strings":["School of Information Science and Technology, Beijing University Of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-5237-5466","affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, Beijing University Of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"last","author":{"id":null,"display_name":"Huaiyu Li","orcid":"https://orcid.org/0009-0004-9614-3715"},"institutions":[{"id":"https://openalex.org/I24201400","display_name":"Chengdu University of Information Technology","ror":"https://ror.org/01yxwrh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I24201400"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaiyu Li","raw_affiliation_strings":["School of Information Science and Technology, Chengdu university of information technology, Chengdu, Sichuan, China"],"raw_orcid":"https://orcid.org/0009-0004-9614-3715","affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, Chengdu university of information technology, Chengdu, Sichuan, China","institution_ids":["https://openalex.org/I24201400"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101350174"],"corresponding_institution_ids":["https://openalex.org/I37796252"],"apc_list":null,"apc_paid":null,"fwci":0.6576,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.7034406,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"366","last_page":"371"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7166589498519897},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6476926803588867},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6064554452896118},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.492241770029068},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4802544414997101},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.326557457447052},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25476908683776855},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.16234993934631348},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1585429310798645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7166589498519897},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6476926803588867},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6064554452896118},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.492241770029068},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4802544414997101},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.326557457447052},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25476908683776855},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.16234993934631348},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1585429310798645},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3704323.3704334","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3704323.3704334","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 13th International Conference on Computing and Pattern Recognition","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2478006605","https://openalex.org/W2765864768","https://openalex.org/W2936802426","https://openalex.org/W3017138616","https://openalex.org/W3024920698","https://openalex.org/W3094678546","https://openalex.org/W3170179936","https://openalex.org/W3197358873","https://openalex.org/W3199131409","https://openalex.org/W3199161700","https://openalex.org/W3199956586","https://openalex.org/W3212117663","https://openalex.org/W4380353702","https://openalex.org/W4381198892","https://openalex.org/W4385822981","https://openalex.org/W6638523607","https://openalex.org/W6786280816","https://openalex.org/W6803367966"],"related_works":["https://openalex.org/W2151749779","https://openalex.org/W2051487156","https://openalex.org/W3179968364","https://openalex.org/W2073681303","https://openalex.org/W1999612375","https://openalex.org/W2938107654","https://openalex.org/W3196421258","https://openalex.org/W2053286651","https://openalex.org/W4387301579","https://openalex.org/W4404782863"],"abstract_inverted_index":{"An":[0],"increased":[1],"prevalence":[2],"of":[3,15],"fraudulent":[4],"techniques":[5],"has":[6],"revealed":[7],"the":[8,80,89,103,113,119,122,136,141,146,156,163,167,174,183,191,201,217],"limitations":[9],"in":[10,33,44,155,194],"performance":[11,172,198],"and":[12,28,69,92,98,130,186],"detection":[13,107],"speed":[14],"existing":[16],"Spoofed":[17],"Speech":[18],"Detection(SSD)":[19],"algorithms.":[20],"To":[21],"address":[22],"these":[23],"challenges,":[24],"a":[25,37,55,66,96,214],"more":[26,61],"stable":[27],"rapid":[29],"algorithm":[30,41,46,120,169,192],"is":[31,42],"proposed":[32,168,193],"this":[34,45,195],"paper.":[35],"Firstly,":[36],"novel":[38],"feature":[39,56,63],"extraction":[40,51],"introduced,":[43],"we":[47],"employing":[48],"an":[49,76],"end-to-end":[50],"frontend":[52],"combined":[53],"with":[54,173],"smoothing":[57],"mechanism":[58],"to":[59,126,139,149,200],"extract":[60],"robust":[62],"representations.":[64],"Secondly,":[65],"one":[67],"teacher":[68,81,90,137],"multi-student":[70],"knowledge":[71,100],"distillation":[72,101],"system,":[73],"guided":[74],"by":[75,135],"Audio":[77],"transformer":[78],"as":[79],"model.":[82],"This":[83],"system":[84],"comprises":[85],"two":[86],"distinct":[87],"networks:":[88],"network":[91],"student":[93],"network.":[94],"Through":[95],"one-teacher":[97],"multiple-students":[99],"structure,":[102],"model":[104,138,176],"achieves":[105,197],"faster":[106],"speeds":[108],"without":[109],"compromising":[110],"performance,":[111],"meeting":[112],"requirements":[114],"for":[115,209],"real-time":[116,218],"processing.":[117],"Finally,":[118],"utilizes":[121],"ASVspoof2021":[123,184,187],"LA":[124,185],"dataset":[125],"simulate":[127],"unknown":[128,153],"attacks":[129,154],"employs":[131],"pseudo":[132],"labels":[133],"generated":[134],"train":[140],"students":[142],"model,":[143],"thus":[144],"enhancing":[145],"system's":[147],"capability":[148],"handle":[150],"increasingly":[151],"variable":[152],"future.":[157],"Experimental":[158],"results":[159],"demonstrate":[160],"that":[161,178],"on":[162,182,213],"ASVspoof2019":[164],"evaluation":[165,189],"set":[166],"reaches":[170],"optimal":[171],"minimum":[175],"parameters":[177],"only":[179,207],"0.33M.":[180],"Moreover,":[181],"DF":[188],"sets,":[190],"paper":[196],"close":[199],"state-of-the-art":[202],"(SOTA)":[203],"algorithms":[204],"while":[205],"requires":[206],"7.64ms":[208],"single":[210],"speech":[211],"inference":[212],"CPU,":[215],"fulfilling":[216],"processing":[219],"criteria.":[220]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
