{"id":"https://openalex.org/W4416251876","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227892","title":"Omni-Dimensional Dynamic Convolution with Global-Local Multi-Scale Aggregation for Robust Spoof Speech Detection","display_name":"Omni-Dimensional Dynamic Convolution with Global-Local Multi-Scale Aggregation for Robust Spoof Speech Detection","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416251876","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227892"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11227892","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227892","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066496238","display_name":"Hai Zhong","orcid":"https://orcid.org/0009-0007-0444-5153"},"institutions":[{"id":"https://openalex.org/I56934997","display_name":"Changsha University of Science and Technology","ror":"https://ror.org/03yph8055","country_code":"CN","type":"education","lineage":["https://openalex.org/I56934997"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hai Zhong","raw_affiliation_strings":["Changsha University of Science &#x0026; Technology,Changsha,China"],"affiliations":[{"raw_affiliation_string":"Changsha University of Science &#x0026; Technology,Changsha,China","institution_ids":["https://openalex.org/I56934997"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071358168","display_name":"Wenhao Wang","orcid":"https://orcid.org/0009-0000-6509-830X"},"institutions":[{"id":"https://openalex.org/I56934997","display_name":"Changsha University of Science and Technology","ror":"https://ror.org/03yph8055","country_code":"CN","type":"education","lineage":["https://openalex.org/I56934997"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhao Wang","raw_affiliation_strings":["Changsha University of Science &#x0026; Technology,Changsha,China"],"affiliations":[{"raw_affiliation_string":"Changsha University of Science &#x0026; Technology,Changsha,China","institution_ids":["https://openalex.org/I56934997"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100405925","display_name":"Jin Zhang","orcid":"https://orcid.org/0000-0001-9346-8590"},"institutions":[{"id":"https://openalex.org/I56934997","display_name":"Changsha University of Science and Technology","ror":"https://ror.org/03yph8055","country_code":"CN","type":"education","lineage":["https://openalex.org/I56934997"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Zhang","raw_affiliation_strings":["Changsha University of Science &#x0026; Technology,Changsha,China"],"affiliations":[{"raw_affiliation_string":"Changsha University of Science &#x0026; Technology,Changsha,China","institution_ids":["https://openalex.org/I56934997"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5066496238"],"corresponding_institution_ids":["https://openalex.org/I56934997"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19495184,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7785999774932861,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7785999774932861,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.11810000240802765,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10828","display_name":"Biometric Identification and Security","score":0.030300000682473183,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.6689000129699707},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5478000044822693},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.5424000024795532},{"id":"https://openalex.org/keywords/spoofing-attack","display_name":"Spoofing attack","score":0.510699987411499},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.45879998803138733},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.44209998846054077},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.439300000667572}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8489999771118164},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.6689000129699707},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6001999974250793},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5478000044822693},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.5424000024795532},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5315999984741211},{"id":"https://openalex.org/C167900197","wikidata":"https://www.wikidata.org/wiki/Q11081100","display_name":"Spoofing attack","level":2,"score":0.510699987411499},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.45879998803138733},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.44209998846054077},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.439300000667572},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4325000047683716},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4052000045776367},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.37940001487731934},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.36059999465942383},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3296999931335449},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.3287000060081482},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.3142000138759613},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.29280000925064087}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11227892","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227892","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W2152859600","https://openalex.org/W2471520273","https://openalex.org/W2696967604","https://openalex.org/W2747024632","https://openalex.org/W2752782242","https://openalex.org/W2794506738","https://openalex.org/W2799053639","https://openalex.org/W2936802426","https://openalex.org/W2964052309","https://openalex.org/W3127781933","https://openalex.org/W3128666957","https://openalex.org/W3161011913","https://openalex.org/W3186285959","https://openalex.org/W3197358873","https://openalex.org/W3197642003","https://openalex.org/W3198837656","https://openalex.org/W3199161700","https://openalex.org/W3206189675","https://openalex.org/W3206191467","https://openalex.org/W3211424380","https://openalex.org/W4206285316","https://openalex.org/W4221104128","https://openalex.org/W4221138880","https://openalex.org/W4225527248","https://openalex.org/W4226264925","https://openalex.org/W4297841376","https://openalex.org/W4306867196","https://openalex.org/W4372260253","https://openalex.org/W4372260481","https://openalex.org/W4372266958","https://openalex.org/W4385822395","https://openalex.org/W4386784299","https://openalex.org/W4392902854","https://openalex.org/W4392902878","https://openalex.org/W4392903271","https://openalex.org/W4392903636","https://openalex.org/W4392904324","https://openalex.org/W4399311019"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,13,128,134,140,146,155,174,186,201,208],"rapid":[2],"development":[3],"of":[4,15,142,150,171,183,203,212],"deep":[5],"learning":[6],"technology":[7],"and":[8,22,58,73,84,107,118,179,210],"Artificial":[9],"Intelligence":[10],"Generated":[11],"Content,":[12],"number":[14],"spoof":[16,88,143],"speech":[17,54,144],"samples":[18],"generated":[19,55],"by":[20,56,137],"Text-To-Speech":[21,57],"Voice":[23,59],"Conversion":[24,60],"algorithms":[25],"has":[26],"surged,":[27],"posing":[28],"significant":[29],"threats":[30],"to":[31,51,113,206],"Automatic":[32],"Speaker":[33],"Verification":[34],"(ASV)":[35],"system.":[36],"To":[37],"address":[38],"this":[39],"challenge,":[40],"we":[41],"propose":[42,63],"an":[43,166],"innovative":[44],"Spoof":[45],"Speech":[46],"Detection":[47],"(SSD)":[48],"system":[49,66,164],"designed":[50],"identify":[52],"synthetic":[53],"algorithms.":[61],"We":[62],"a":[64,180],"novel":[65],"leveraging":[67],"Global-Local":[68,75],"Omni-Dimensional":[69],"Dynamic":[70],"Convolution":[71],"(GLODC)":[72],"Multi-Scale":[74],"Feature":[76],"Aggregation":[77],"(MGLFA),":[78],"which":[79],"dynamically":[80,96],"captures":[81],"both":[82,116],"local":[83,117],"global":[85,119],"artifacts":[86,120],"in":[87,121,145,193],"speech,":[89],"significantly":[90],"improving":[91],"detection":[92],"accuracy.":[93],"The":[94,124],"GLODC":[95],"adjusts":[97],"across":[98],"multiple":[99],"dimensions,":[100],"such":[101],"as":[102],"spatial":[103],"size,":[104],"input":[105],"channels,":[106],"convolutional":[108],"kernels.":[109],"This":[110],"enables":[111],"it":[112],"effectively":[114],"detect":[115],"feature":[122],"maps.":[123],"MGLFA,":[125],"improved":[126],"from":[127],"Res2Net":[129],"architecture,":[130],"focuses":[131],"particularly":[132],"on":[133,139,173,185],"frequency":[135],"components":[136],"capitalizing":[138],"characteristics":[141],"less":[147],"refined":[148],"processing":[149],"high-frequency":[151],"parts,":[152],"thereby":[153],"enhancing":[154],"model\u2019s":[156],"generalization":[157],"ability.":[158],"Experimental":[159],"results":[160],"show":[161],"that":[162],"our":[163,204],"achieves":[165],"Equal":[167],"Error":[168],"Rate":[169],"(EER)":[170],"0.61%":[172],"ASVspoof":[175],"2021":[176],"LA":[177],"dataset":[178],"weighted":[181],"EER":[182],"10.12%":[184],"ADD":[187],"2023":[188],"dataset,":[189],"demonstrating":[190],"its":[191],"effectiveness":[192],"detecting":[194],"diverse":[195],"spoofing":[196],"attacks.":[197],"These":[198],"findings":[199],"highlight":[200],"potential":[202],"approach":[205],"enhance":[207],"security":[209],"reliability":[211],"ASV":[213],"systems.":[214]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
