{"id":"https://openalex.org/W4416252351","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228261","title":"Wav2DF-TSL: Two-stage Learning with Efficient Pre-training and Hierarchical Experts Fusion for Robust Audio Deepfake Detection","display_name":"Wav2DF-TSL: Two-stage Learning with Efficient Pre-training and Hierarchical Experts Fusion for Robust Audio Deepfake Detection","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416252351","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228261"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228261","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228261","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067270756","display_name":"Yunqi Hao","orcid":"https://orcid.org/0000-0002-6838-1511"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yunqi Hao","raw_affiliation_strings":["Xinjiang University,School of Computer Science and Technology,Urumqi,China"],"affiliations":[{"raw_affiliation_string":"Xinjiang University,School of Computer Science and Technology,Urumqi,China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101732742","display_name":"Yi\u2010Hao Chen","orcid":"https://orcid.org/0000-0002-9977-7180"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yihao Chen","raw_affiliation_strings":["Hefei iFly Digital Technology Co. Ltd.,Hefei,China"],"affiliations":[{"raw_affiliation_string":"Hefei iFly Digital Technology Co. Ltd.,Hefei,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100413867","display_name":"Minqiang Xu","orcid":"https://orcid.org/0000-0003-3625-1736"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Minqiang Xu","raw_affiliation_strings":["Hefei iFly Digital Technology Co. Ltd.,Hefei,China"],"affiliations":[{"raw_affiliation_string":"Hefei iFly Digital Technology Co. Ltd.,Hefei,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074900997","display_name":"Jianbo Zhan","orcid":"https://orcid.org/0000-0002-5885-1970"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianbo Zhan","raw_affiliation_strings":["Hefei iFly Digital Technology Co. Ltd.,Hefei,China"],"affiliations":[{"raw_affiliation_string":"Hefei iFly Digital Technology Co. Ltd.,Hefei,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100317921","display_name":"Liang He","orcid":"https://orcid.org/0000-0003-4826-629X"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang He","raw_affiliation_strings":["Xinjiang University,School of Computer Science and Technology,Urumqi,China"],"affiliations":[{"raw_affiliation_string":"Xinjiang University,School of Computer Science and Technology,Urumqi,China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101997714","display_name":"Lei Fang","orcid":"https://orcid.org/0000-0001-8573-860X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei Fang","raw_affiliation_strings":["Hefei iFly Digital Technology Co. Ltd.,Hefei,China"],"affiliations":[{"raw_affiliation_string":"Hefei iFly Digital Technology Co. Ltd.,Hefei,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035297098","display_name":"Sheng Fang","orcid":"https://orcid.org/0000-0002-7201-2540"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sian Fang","raw_affiliation_strings":["Hefei iFly Digital Technology Co. Ltd.,Hefei,China"],"affiliations":[{"raw_affiliation_string":"Hefei iFly Digital Technology Co. Ltd.,Hefei,China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037906509","display_name":"Lin Liu","orcid":"https://orcid.org/0000-0003-4173-7650"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin Liu","raw_affiliation_strings":["Hefei iFly Digital Technology Co. Ltd.,Hefei,China"],"affiliations":[{"raw_affiliation_string":"Hefei iFly Digital Technology Co. Ltd.,Hefei,China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5067270756"],"corresponding_institution_ids":["https://openalex.org/I96908189"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.45620112,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.3098999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.3098999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.15209999680519104,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.09070000052452087,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spoofing-attack","display_name":"Spoofing attack","score":0.6877999901771545},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6152999997138977},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5989000201225281},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.5942000150680542},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.5809000134468079},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4731999933719635},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4521999955177307},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.4352000057697296}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8073999881744385},{"id":"https://openalex.org/C167900197","wikidata":"https://www.wikidata.org/wiki/Q11081100","display_name":"Spoofing attack","level":2,"score":0.6877999901771545},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6510000228881836},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6152999997138977},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5989000201225281},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.5942000150680542},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.5809000134468079},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5748999714851379},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4731999933719635},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4521999955177307},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.4352000057697296},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.39649999141693115},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.37619999051094055},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.350600004196167},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.34459999203681946},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3287000060081482},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.31619998812675476},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C2780186347","wikidata":"https://www.wikidata.org/wiki/Q11414","display_name":"Subnetwork","level":2,"score":0.27410000562667847},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.27309998869895935}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228261","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228261","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W2799053639","https://openalex.org/W2936802426","https://openalex.org/W3026777299","https://openalex.org/W3170179936","https://openalex.org/W3196368020","https://openalex.org/W3197134965","https://openalex.org/W3197358873","https://openalex.org/W3198329097","https://openalex.org/W3198486673","https://openalex.org/W3201773091","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3211424380","https://openalex.org/W3212117663","https://openalex.org/W4285605725","https://openalex.org/W4297841787","https://openalex.org/W4323022270","https://openalex.org/W4372266958","https://openalex.org/W4375869189","https://openalex.org/W4381198892","https://openalex.org/W4385822395","https://openalex.org/W4385822591","https://openalex.org/W4388579618","https://openalex.org/W4392902854","https://openalex.org/W4392903636","https://openalex.org/W4392903665","https://openalex.org/W4392904324","https://openalex.org/W4392910532","https://openalex.org/W4392910562","https://openalex.org/W4402111788","https://openalex.org/W4402111996","https://openalex.org/W4402112426","https://openalex.org/W4403791925","https://openalex.org/W4403955716"],"related_works":[],"abstract_inverted_index":{"In":[0,73,101],"recent":[1],"years,":[2],"self-supervised":[3],"learning":[4,32,58],"(SSL)":[5],"models":[6,20],"have":[7],"made":[8],"significant":[9],"progress":[10],"in":[11,155],"audio":[12,70],"deepfake":[13,71],"detection":[14],"(ADD)":[15],"tasks.":[16],"However,":[17],"existing":[18,162],"SSL":[19],"mainly":[21],"rely":[22],"on":[23,62,139,145],"large-scale":[24],"real":[25],"speech":[26],"for":[27,68],"pre-training":[28,75],"and":[29,64],"lack":[30],"the":[31,44,48,74,92,102,107,131,136,146,161],"of":[33,47,87,94,111],"spoofed":[34,89],"samples,":[35],"which":[36],"leads":[37],"to":[38,40,80,115],"susceptibility":[39],"domain":[41],"bias":[42],"during":[43],"fine-tuning":[45,103],"process":[46],"ADD":[49],"task.":[50],"To":[51],"this":[52],"end,":[53],"we":[54,77,105],"propose":[55,106],"a":[56,151],"two-stage":[57],"strategy":[59],"(Wav2DF-TSL)":[60],"based":[61],"pretraining":[63],"hierarchical":[65,108],"expert":[66],"fusion":[67],"robust":[69],"detection.":[72],"stage,":[76,104],"use":[78],"adapters":[79],"efficiently":[81],"learn":[82],"artifacts":[83],"from":[84],"3000":[85],"hours":[86],"unlabelled":[88],"speech,":[90],"improving":[91],"adaptability":[93],"front-end":[95],"features":[96],"while":[97],"mitigating":[98],"catastrophic":[99],"forgetting.":[100],"adaptive":[109],"mixture":[110],"experts":[112],"(HA-MoE)":[113],"method":[114,133],"dynamically":[116],"fuse":[117],"multi-level":[118],"spoofing":[119],"cues":[120],"through":[121],"multi-expert":[122],"collaboration":[123],"with":[124],"gated":[125],"routing.":[126],"Experimental":[127],"results":[128],"show":[129],"that":[130],"proposed":[132],"significantly":[134],"outperforms":[135],"baseline":[137],"system":[138],"all":[140],"four":[141],"benchmark":[142],"datasets,":[143],"especially":[144],"cross-domain":[147],"In-the-wild":[148],"dataset,":[149],"achieving":[150],"27.5%":[152],"relative":[153],"improvement":[154],"equal":[156],"error":[157],"rate":[158],"(EER),":[159],"outperforming":[160],"state-of-the-art":[163],"systems.":[164]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
