{"id":"https://openalex.org/W4375869371","doi":"https://doi.org/10.1109/icassp49357.2023.10095927","title":"BTS-E: Audio Deepfake Detection Using Breathing-Talking-Silence Encoder","display_name":"BTS-E: Audio Deepfake Detection Using Breathing-Talking-Silence Encoder","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869371","doi":"https://doi.org/10.1109/icassp49357.2023.10095927"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095927","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095927","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064475040","display_name":"Thien-Phuc Doan","orcid":"https://orcid.org/0000-0001-7988-5953"},"institutions":[{"id":"https://openalex.org/I141371507","display_name":"Soongsil University","ror":"https://ror.org/017xnm587","country_code":"KR","type":"education","lineage":["https://openalex.org/I141371507"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Thien-Phuc Doan","raw_affiliation_strings":["Soongsil University,School of Electronic Engineering,Seoul,Republic of Korea","School of Electronic Engineering, Soongsil University, Seoul, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Soongsil University,School of Electronic Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I141371507"]},{"raw_affiliation_string":"School of Electronic Engineering, Soongsil University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I141371507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052565965","display_name":"Long Nguyen-Vu","orcid":"https://orcid.org/0000-0002-7764-6235"},"institutions":[{"id":"https://openalex.org/I141371507","display_name":"Soongsil University","ror":"https://ror.org/017xnm587","country_code":"KR","type":"education","lineage":["https://openalex.org/I141371507"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Long Nguyen-Vu","raw_affiliation_strings":["Soongsil University,School of Electronic Engineering,Seoul,Republic of Korea","School of Electronic Engineering, Soongsil University, Seoul, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Soongsil University,School of Electronic Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I141371507"]},{"raw_affiliation_string":"School of Electronic Engineering, Soongsil University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I141371507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031598830","display_name":"Souhwan Jung","orcid":"https://orcid.org/0000-0003-2676-3412"},"institutions":[{"id":"https://openalex.org/I141371507","display_name":"Soongsil University","ror":"https://ror.org/017xnm587","country_code":"KR","type":"education","lineage":["https://openalex.org/I141371507"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Souhwan Jung","raw_affiliation_strings":["Soongsil University,School of Electronic Engineering,Seoul,Republic of Korea","School of Electronic Engineering, Soongsil University, Seoul, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Soongsil University,School of Electronic Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I141371507"]},{"raw_affiliation_string":"School of Electronic Engineering, Soongsil University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I141371507"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062053307","display_name":"Kihun Hong","orcid":"https://orcid.org/0000-0002-5538-3630"},"institutions":[{"id":"https://openalex.org/I141371507","display_name":"Soongsil University","ror":"https://ror.org/017xnm587","country_code":"KR","type":"education","lineage":["https://openalex.org/I141371507"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kihun Hong","raw_affiliation_strings":["Soongsil University,School of Electronic Engineering,Seoul,Republic of Korea","School of Electronic Engineering, Soongsil University, Seoul, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Soongsil University,School of Electronic Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I141371507"]},{"raw_affiliation_string":"School of Electronic Engineering, Soongsil University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I141371507"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":7.3422,"has_fulltext":false,"cited_by_count":45,"citation_normalized_percentile":{"value":0.9781492,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8378750681877136},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6716303825378418},{"id":"https://openalex.org/keywords/silence","display_name":"Silence","score":0.5438244342803955},{"id":"https://openalex.org/keywords/breathing","display_name":"Breathing","score":0.5016858577728271},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.49618321657180786},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.47143521904945374},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.4628777503967285},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3638252317905426},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.24279406666755676},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.09262242913246155}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8378750681877136},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6716303825378418},{"id":"https://openalex.org/C2781115785","wikidata":"https://www.wikidata.org/wiki/Q502261","display_name":"Silence","level":2,"score":0.5438244342803955},{"id":"https://openalex.org/C39300077","wikidata":"https://www.wikidata.org/wiki/Q9530","display_name":"Breathing","level":2,"score":0.5016858577728271},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.49618321657180786},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.47143521904945374},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.4628777503967285},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3638252317905426},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.24279406666755676},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.09262242913246155},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095927","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095927","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6399999856948853}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1946196160","https://openalex.org/W2097450352","https://openalex.org/W2129120544","https://openalex.org/W2176804518","https://openalex.org/W2187089797","https://openalex.org/W2519091744","https://openalex.org/W2745896134","https://openalex.org/W2791846162","https://openalex.org/W2891042661","https://openalex.org/W2908510526","https://openalex.org/W2936802426","https://openalex.org/W2967606780","https://openalex.org/W3024920698","https://openalex.org/W3094897602","https://openalex.org/W3128121831","https://openalex.org/W3163596559","https://openalex.org/W3170179936","https://openalex.org/W3174758275","https://openalex.org/W3197184958","https://openalex.org/W3197358873","https://openalex.org/W3198329097","https://openalex.org/W3201016147","https://openalex.org/W3215578331","https://openalex.org/W4221161332","https://openalex.org/W4225854381","https://openalex.org/W4283067891","https://openalex.org/W4296069271","https://openalex.org/W6679444976","https://openalex.org/W6757817989","https://openalex.org/W6784088632","https://openalex.org/W6796730497","https://openalex.org/W6801032166"],"related_works":["https://openalex.org/W2140535326","https://openalex.org/W1607271848","https://openalex.org/W4251121070","https://openalex.org/W2734809835","https://openalex.org/W2184961913","https://openalex.org/W1965764303","https://openalex.org/W2750153830","https://openalex.org/W2214987084","https://openalex.org/W1964726586","https://openalex.org/W3008391423"],"abstract_inverted_index":{"Voice":[0],"phishing":[1],"(vishing)":[2],"is":[3],"increasingly":[4],"popular":[5],"due":[6],"to":[7,21,46,55,69,103,121,154],"the":[8,16,28,71,129,132,142,147,150],"development":[9],"of":[10,18,131,149],"speech":[11],"synthesis":[12],"technology.":[13],"In":[14,61,140],"particular,":[15],"use":[17,85],"deep":[19],"learning":[20],"generate":[22],"an":[23,81],"arbitrary-content":[24],"audio":[25,82],"clip":[26],"simulating":[27],"victim\u2019s":[29],"voice":[30],"makes":[31],"it":[32],"difficult":[33],"not":[34],"only":[35],"for":[36,40,88],"humans":[37],"but":[38],"also":[39],"automatic":[41],"speaker":[42],"verification":[43],"(ASV)":[44],"systems":[45,50],"distinguish.":[47],"Countermeasure":[48],"(CM)":[49],"have":[51],"been":[52],"developed":[53],"recently":[54],"help":[56],"ASV":[57],"combat":[58],"synthetic":[59],"speech.":[60],"this":[62,86],"work,":[63],"we":[64],"propose":[65],"BTS-E,":[66],"a":[67,111],"framework":[68],"evaluate":[70],"correlation":[72],"between":[73],"Breathing,":[74],"Talking":[75],"(speech),":[76],"and":[77,117],"Silence":[78],"sounds":[79],"in":[80,136],"clip,":[83],"then":[84],"information":[87],"deepfake":[89,138],"detection":[90],"tasks.":[91],"We":[92,109],"argue":[93],"that":[94],"natural":[95],"human":[96],"sounds,":[97],"such":[98],"as":[99],"breathing,":[100],"are":[101],"hard":[102],"synthesize":[104],"by":[105,152],"Text-to-speech":[106],"(TTS)":[107],"system.":[108],"conducted":[110],"large-scale":[112],"evaluation":[113,119],"using":[114],"ASVspoof":[115],"2019":[116],"2021":[118],"set":[120],"validate":[122],"our":[123],"hypothesis.":[124],"The":[125],"experiment":[126],"results":[127],"show":[128],"applicability":[130],"breathing":[133],"sound":[134],"feature":[135],"detecting":[137],"voices.":[139],"general,":[141],"proposed":[143],"system":[144],"significantly":[145],"increases":[146],"performance":[148],"classifier":[151],"up":[153],"46%.":[155]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":24},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
