{"id":"https://openalex.org/W4416036204","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.794","title":"What You Read Isn\u2019t What You Hear: Linguistic Sensitivity in Deepfake Speech Detection","display_name":"What You Read Isn\u2019t What You Hear: Linguistic Sensitivity in Deepfake Speech Detection","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036204","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.794"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.794","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.794","pdf_url":"https://aclanthology.org/2025.emnlp-main.794.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.794.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102023223","display_name":"Binh Nguyen","orcid":"https://orcid.org/0000-0002-4320-2978"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Binh Nguyen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006589840","display_name":"Shuju Shi","orcid":"https://orcid.org/0000-0002-8349-1145"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shuju Shi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022252482","display_name":"Ryan Ofman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ryan Ofman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5039440984","display_name":"Thai Le","orcid":"https://orcid.org/0000-0001-9632-6870"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thai Le","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102023223"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1847044,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"15752","last_page":"15766"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.14309999346733093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.14309999346733093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.06270000338554382,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12684","display_name":"Stuttering Research and Treatment","score":0.05779999867081642,"subfield":{"id":"https://openalex.org/subfields/3203","display_name":"Clinical Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.5842000246047974},{"id":"https://openalex.org/keywords/detection-theory","display_name":"Detection theory","score":0.41040000319480896},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.32519999146461487},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.30489999055862427},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.29789999127388}],"concepts":[{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.5842000246047974},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5817000269889832},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4381999969482422},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43130001425743103},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42739999294281006},{"id":"https://openalex.org/C137270730","wikidata":"https://www.wikidata.org/wiki/Q120811","display_name":"Detection theory","level":3,"score":0.41040000319480896},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3617999851703644},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.32519999146461487},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.30489999055862427},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.28220000863075256},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.794","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.794","pdf_url":"https://aclanthology.org/2025.emnlp-main.794.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.794","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.794","pdf_url":"https://aclanthology.org/2025.emnlp-main.794.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036204.pdf","grobid_xml":"https://content.openalex.org/works/W4416036204.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2],"text-to-speech":[3],"technology":[4],"have":[5],"enabled":[6],"highly":[7],"realistic":[8],"voice":[9],"generation,":[10],"fueling":[11],"audio-based":[12],"deepfake":[13,144],"attacks":[14],"such":[15,27],"as":[16],"fraud":[17],"and":[18,55,95,121,146,164],"impersonation.While":[19],"audio":[20,107,123,143],"antispoofing":[21],"systems":[22],"are":[23,126],"critical":[24],"for":[25,68],"detecting":[26],"threats,":[28],"prior":[29],"research":[30],"has":[31],"predominantly":[32],"focused":[33],"on":[34,90,105],"acoustic-level":[35],"perturbations,":[36],"leaving":[37],"the":[38,49,96,134,156,169],"impact":[39],"of":[40,52,98,171],"linguistic":[41,50,78,119,166],"variation":[42,167],"largely":[43],"unexplored.In":[44],"this":[45],"paper,":[46],"we":[47,116,137],"investigate":[48],"sensitivity":[51],"both":[53],"open-source":[54,92],"commercial":[56,100,152],"anti-spoofing":[57,173],"detectors":[58],"by":[59],"introducing":[60],"TAPAS":[61,149],"(Transcript-to-Audio":[62],"Perturbation":[63],"Anti-Spoofing),":[64],"a":[65,111,139],"novel":[66],"framework":[67],"transcript-level":[69],"adversarial":[70],"attacks.Our":[71],"extensive":[72],"evaluation":[73],"shows":[74],"that":[75,118,148],"even":[76],"minor":[77],"perturbations":[79],"can":[80,150],"significantly":[81],"degrade":[82],"detection":[83],"accuracy:":[84],"attack":[85],"success":[86],"rates":[87],"exceed":[88],"60%":[89],"several":[91],"detector-voice":[93],"pairs,":[94],"accuracy":[97],"one":[99],"detector":[101,131],"drops":[102],"from":[103],"100%":[104],"synthetic":[106],"to":[108,130,158],"just":[109],"32%.Through":[110],"comprehensive":[112],"feature":[113],"attribution":[114],"analysis,":[115],"find":[117],"complexity":[120],"model-level":[122],"embedding":[124],"similarity":[125],"key":[127],"factors":[128],"contributing":[129],"vulnerabilities.To":[132],"illustrate":[133],"real-world":[135],"risks,":[136],"replicate":[138],"recent":[140],"Brad":[141],"Pitt":[142],"scam":[145],"demonstrate":[147],"bypass":[151],"detectors.These":[153],"findings":[154],"underscore":[155],"need":[157],"move":[159],"beyond":[160],"purely":[161],"acoustic":[162],"defenses":[163],"incorporate":[165],"into":[168],"design":[170],"robust":[172],"systems.Our":[174],"source":[175],"code":[176],"is":[177],"available":[178],"at":[179],"https:":[180],"//github.com/nqbinh17/audio_linguistic_adversarial.":[181]},"counts_by_year":[],"updated_date":"2026-03-10T14:07:55.174380","created_date":"2025-11-08T00:00:00"}
