{"id":"https://openalex.org/W4225527248","doi":"https://doi.org/10.21437/odyssey.2022-16","title":"Automatic Speaker Verification Spoofing and Deepfake Detection Using Wav2vec 2.0 and Data Augmentation","display_name":"Automatic Speaker Verification Spoofing and Deepfake Detection Using Wav2vec 2.0 and Data Augmentation","publication_year":2022,"publication_date":"2022-06-17","ids":{"openalex":"https://openalex.org/W4225527248","doi":"https://doi.org/10.21437/odyssey.2022-16"},"language":"en","primary_location":{"id":"doi:10.21437/odyssey.2022-16","is_oa":false,"landing_page_url":"https://doi.org/10.21437/odyssey.2022-16","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Speaker and Language Recognition Workshop (Odyssey 2022)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2202.12233","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018712252","display_name":"Hemlata Tak","orcid":"https://orcid.org/0000-0002-0102-523X"},"institutions":[{"id":"https://openalex.org/I1902872","display_name":"EURECOM","ror":"https://ror.org/00sse7z02","country_code":"FR","type":"education","lineage":["https://openalex.org/I1902872","https://openalex.org/I205703379"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Hemlata Tak","raw_affiliation_strings":["Eurecom [Sophia Antipolis]"],"affiliations":[{"raw_affiliation_string":"Eurecom [Sophia Antipolis]","institution_ids":["https://openalex.org/I1902872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049594655","display_name":"Massimiliano Todisco","orcid":"https://orcid.org/0000-0003-2883-0324"},"institutions":[{"id":"https://openalex.org/I1902872","display_name":"EURECOM","ror":"https://ror.org/00sse7z02","country_code":"FR","type":"education","lineage":["https://openalex.org/I1902872","https://openalex.org/I205703379"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Massimiliano Todisco","raw_affiliation_strings":["Eurecom [Sophia Antipolis]"],"affiliations":[{"raw_affiliation_string":"Eurecom [Sophia Antipolis]","institution_ids":["https://openalex.org/I1902872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327839","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0001-8246-0606"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Xin Wang","raw_affiliation_strings":["National Institute of Informatics"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091834525","display_name":"Jee-weon Jung","orcid":"https://orcid.org/0000-0003-0505-2988"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jee-weon Jung","raw_affiliation_strings":["Naver Corporation"],"affiliations":[{"raw_affiliation_string":"Naver Corporation","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":["National Institute of Informatics"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066811192","display_name":"Nicholas Evans","orcid":"https://orcid.org/0000-0002-8459-1041"},"institutions":[{"id":"https://openalex.org/I1902872","display_name":"EURECOM","ror":"https://ror.org/00sse7z02","country_code":"FR","type":"education","lineage":["https://openalex.org/I1902872","https://openalex.org/I205703379"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Nicholas Evans","raw_affiliation_strings":["Eurecom [Sophia Antipolis]"],"affiliations":[{"raw_affiliation_string":"Eurecom [Sophia Antipolis]","institution_ids":["https://openalex.org/I1902872"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5018712252"],"corresponding_institution_ids":["https://openalex.org/I1902872"],"apc_list":null,"apc_paid":null,"fwci":23.1846,"has_fulltext":true,"cited_by_count":178,"citation_normalized_percentile":{"value":0.99623052,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"112","last_page":"119"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9814000129699707,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9732999801635742,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8143553733825684},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.7978593707084656},{"id":"https://openalex.org/keywords/spoofing-attack","display_name":"Spoofing attack","score":0.7593144178390503},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4907001256942749},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.3743691146373749},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.16630348563194275}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8143553733825684},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.7978593707084656},{"id":"https://openalex.org/C167900197","wikidata":"https://www.wikidata.org/wiki/Q11081100","display_name":"Spoofing attack","level":2,"score":0.7593144178390503},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4907001256942749},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.3743691146373749},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.16630348563194275}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/odyssey.2022-16","is_oa":false,"landing_page_url":"https://doi.org/10.21437/odyssey.2022-16","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Speaker and Language Recognition Workshop (Odyssey 2022)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2202.12233","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2202.12233","pdf_url":"https://arxiv.org/pdf/2202.12233","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:HAL:hal-03624316v1","is_oa":false,"landing_page_url":"https://hal.science/hal-03624316","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2022","raw_type":"Preprints, Working Papers, ..."}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2202.12233","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2202.12233","pdf_url":"https://arxiv.org/pdf/2202.12233","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4225527248.pdf","grobid_xml":"https://content.openalex.org/works/W4225527248.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4294437891","https://openalex.org/W4226389478","https://openalex.org/W2790784932","https://openalex.org/W4297792928","https://openalex.org/W4221161333","https://openalex.org/W2061278248","https://openalex.org/W3113108043","https://openalex.org/W4298202768","https://openalex.org/W1516392727","https://openalex.org/W2140022733"],"abstract_inverted_index":{"The":[0],"performance":[1],"of":[2,10,36,55,104],"spoofing":[3],"countermeasure":[4],"systems":[5],"depends":[6],"fundamentally":[7],"upon\\nthe":[8],"use":[9],"sufficiently":[11],"representative":[12],"training":[13],"data.":[14],"With":[15],"this":[16,45],"usually":[17],"being\\nlimited,":[18],"current":[19],"solutions":[20],"typically":[21],"lack":[22],"generalisation":[23],"to":[24,30,102,108],"attacks":[25,38],"encountered\\nin":[26],"the":[27,34,53,78,88],"wild.":[28],"Strategies":[29],"improve":[31],"reliability":[32],"in":[33,44,52],"face":[35],"uncontrolled,\\nunpredictable":[37],"are":[39],"hence":[40],"needed.":[41],"We":[42],"report":[43],"paper":[46],"our":[47,109],"efforts":[48],"to\\nuse":[49],"self-supervised":[50],"learning":[51],"form":[54],"a":[56],"wav2vec":[57],"2.0":[58],"front-end":[59],"with":[60,97],"fine\\ntuning.":[61],"Despite":[62],"initial":[63],"base":[64],"representations":[65],"being":[66],"learned":[67],"using":[68],"only":[69],"bona":[70],"fide\\ndata":[71],"and":[72,93],"no":[73],"spoofed":[74],"data,":[75],"we":[76],"obtain":[77],"lowest":[79],"equal":[80],"error":[81],"rates":[82],"reported":[83],"in\\nthe":[84],"literature":[85],"for":[86],"both":[87],"ASVspoof":[89],"2021":[90],"Logical":[91],"Access":[92],"Deepfake\\ndatabases.":[94],"When":[95],"combined":[96],"data":[98],"augmentation,these":[99],"results":[100],"correspond":[101],"an\\nimprovement":[103],"almost":[105],"90%":[106],"relative":[107],"baseline":[110],"system.\\n":[111]},"counts_by_year":[{"year":2026,"cited_by_count":11},{"year":2025,"cited_by_count":94},{"year":2024,"cited_by_count":42},{"year":2023,"cited_by_count":22},{"year":2022,"cited_by_count":9}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
