{"id":"https://openalex.org/W4403182579","doi":"https://doi.org/10.1109/is262782.2024.10704095","title":"Deepfake Audio Detection Using Spectrogram-based Feature and Ensemble of Deep Learning Models","display_name":"Deepfake Audio Detection Using Spectrogram-based Feature and Ensemble of Deep Learning Models","publication_year":2024,"publication_date":"2024-09-30","ids":{"openalex":"https://openalex.org/W4403182579","doi":"https://doi.org/10.1109/is262782.2024.10704095"},"language":"en","primary_location":{"id":"doi:10.1109/is262782.2024.10704095","is_oa":false,"landing_page_url":"https://doi.org/10.1109/is262782.2024.10704095","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 5th International Symposium on the Internet of Sounds (IS2)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111250608","display_name":"Lam Pham","orcid":null},"institutions":[{"id":"https://openalex.org/I132118926","display_name":"Austrian Institute of Technology","ror":"https://ror.org/04knbh022","country_code":"AT","type":"facility","lineage":["https://openalex.org/I132118926"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Lam Pham","raw_affiliation_strings":["Austrian Institute of Technology,Vienna,Austria","Austrian Institute of Technology, Vienna, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Austrian Institute of Technology,Vienna,Austria","institution_ids":["https://openalex.org/I132118926"]},{"raw_affiliation_string":"Austrian Institute of Technology, Vienna, Austria","institution_ids":["https://openalex.org/I132118926"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056967532","display_name":"Phat Lam","orcid":"https://orcid.org/0009-0003-5105-5976"},"institutions":[{"id":"https://openalex.org/I47265099","display_name":"Ho Chi Minh City University of Technology","ror":"https://ror.org/04qva2324","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I47265099"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Phat Lam","raw_affiliation_strings":["HCM University of Technology,Ho Chi Minh city,Vietnam","HCM University of Technology, Ho Chi Minh city, Vietnam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"HCM University of Technology,Ho Chi Minh city,Vietnam","institution_ids":["https://openalex.org/I47265099"]},{"raw_affiliation_string":"HCM University of Technology, Ho Chi Minh city, Vietnam","institution_ids":["https://openalex.org/I47265099"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101495770","display_name":"Truong Thanh Nguyen","orcid":"https://orcid.org/0000-0003-3139-4105"},"institutions":[{"id":"https://openalex.org/I47265099","display_name":"Ho Chi Minh City University of Technology","ror":"https://ror.org/04qva2324","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I47265099"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Truong Nguyen","raw_affiliation_strings":["HCM University of Technology,Ho Chi Minh city,Vietnam","HCM University of Technology, Ho Chi Minh city, Vietnam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"HCM University of Technology,Ho Chi Minh city,Vietnam","institution_ids":["https://openalex.org/I47265099"]},{"raw_affiliation_string":"HCM University of Technology, Ho Chi Minh city, Vietnam","institution_ids":["https://openalex.org/I47265099"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100912676","display_name":"Huyen Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I92614990","display_name":"Tokyo University of Agriculture and Technology","ror":"https://ror.org/00qg0kr10","country_code":"JP","type":"education","lineage":["https://openalex.org/I92614990"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Huyen Nguyen","raw_affiliation_strings":["Tokyo University of Agriculture and Technology,Tokyo,Japan","Tokyo University of Agriculture and Technology, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tokyo University of Agriculture and Technology,Tokyo,Japan","institution_ids":["https://openalex.org/I92614990"]},{"raw_affiliation_string":"Tokyo University of Agriculture and Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I92614990"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102746568","display_name":"Alexander Schindler","orcid":"https://orcid.org/0000-0001-6058-7753"},"institutions":[{"id":"https://openalex.org/I132118926","display_name":"Austrian Institute of Technology","ror":"https://ror.org/04knbh022","country_code":"AT","type":"facility","lineage":["https://openalex.org/I132118926"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Alexander Schindler","raw_affiliation_strings":["Austrian Institute of Technology,Vienna,Austria","Austrian Institute of Technology, Vienna, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Austrian Institute of Technology,Vienna,Austria","institution_ids":["https://openalex.org/I132118926"]},{"raw_affiliation_string":"Austrian Institute of Technology, Vienna, Austria","institution_ids":["https://openalex.org/I132118926"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5111250608"],"corresponding_institution_ids":["https://openalex.org/I132118926"],"apc_list":null,"apc_paid":null,"fwci":9.4242,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.98795181,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9142000079154968,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9142000079154968,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.972732663154602},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7370493412017822},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6741650700569153},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.567305326461792},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5610020160675049},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.5186552405357361},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43268388509750366},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4291938543319702},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40205666422843933}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.972732663154602},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7370493412017822},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6741650700569153},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.567305326461792},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5610020160675049},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.5186552405357361},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43268388509750366},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4291938543319702},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40205666422843933},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/is262782.2024.10704095","is_oa":false,"landing_page_url":"https://doi.org/10.1109/is262782.2024.10704095","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 5th International Symposium on the Internet of Sounds (IS2)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2120847449","https://openalex.org/W2123299109","https://openalex.org/W2401839215","https://openalex.org/W2936802426","https://openalex.org/W2967606780","https://openalex.org/W2972526452","https://openalex.org/W3032636897","https://openalex.org/W3033711348","https://openalex.org/W3167533889","https://openalex.org/W4221138880","https://openalex.org/W4309049624","https://openalex.org/W4311681683","https://openalex.org/W4319977731","https://openalex.org/W4323519173","https://openalex.org/W4381854917","https://openalex.org/W4385822353","https://openalex.org/W4385822356","https://openalex.org/W4386302282","https://openalex.org/W4394019163","https://openalex.org/W6847363464"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W2088854863","https://openalex.org/W4402568167","https://openalex.org/W3179495260","https://openalex.org/W1976719989","https://openalex.org/W3124943098","https://openalex.org/W4308112567","https://openalex.org/W3162132941"],"abstract_inverted_index":{"In":[0,44,161],"this":[1],"paper,":[2],"we":[3,91,165],"propose":[4],"a":[5,18,93,195],"deep-learning-based":[6],"system":[7],"for":[8,24],"the":[9,21,46,89,111,131,137,162,167,184,188,219,252,260,274],"task":[10,275],"of":[11,20,60,77,96,118,172,242,262,276],"deepfake":[12,278],"audio":[13,49,169,180,189,205,277],"detection.":[14,279],"This":[15],"work":[16],"is":[17,35,50,108,134,245],"part":[19],"proposed":[22,115,225],"toolchain":[23],"speech":[25],"analysis":[26],"in":[27,42,251],"EUCINF":[28],"(EUropean":[29],"Cyber":[30],"and":[31,83,158,176,265],"INFormation)":[32],"project,":[33],"which":[34,244],"an":[36,237],"European":[37],"project":[38],"with":[39,72],"multiple":[40],"partners":[41],"Europe.":[43],"particular,":[45],"raw":[47],"input":[48,185],"first":[51,106],"transformed":[52],"into":[53],"various":[54],"spectrograms":[55,112,264],"using":[56,113],"three":[57,101],"transformation":[58],"methods":[59],"Short-time":[61],"Fourier":[62],"Transform":[63,66,69],"(STFT),":[64],"Constant-Q":[65],"(CQT),":[67],"Wavelet":[68],"(WT)":[70],"combined":[71],"different":[73],"auditory-":[74],"based":[75,99],"filters":[76,81],"Mel,":[78],"Gammatone,":[79],"linear":[80],"(LF),":[82],"discrete":[84],"cosine":[85],"transform":[86],"(DCT).":[87],"Given":[88],"spectrograms,":[90],"evaluate":[92],"wide":[94],"range":[95],"classification":[97],"models":[98,117,143,171,211,226],"on":[100,227,273],"deep":[102,209,266],"learning":[103,139,210,267],"approaches.":[104],"The":[105],"approach":[107,133],"to":[109,135,178,200,217,248,269],"train":[110],"our":[114,224],"baseline":[116],"CNN-based":[119],"model":[120,124,127,199,235,271],"(CNN-":[121],"baseline),":[122],"RNN-based":[123],"(RNN-baseline),":[125],"C-RNN":[126],"(C-RNN":[128],"baseline).":[129],"Meanwhile,":[130],"second":[132],"apply":[136],"transfer":[138],"from":[140,183,212],"computer":[141],"vision":[142],"such":[144],"as":[145],"ResNet-":[146],"18,":[147],"MobileNet-V3,":[148],"EfficientNet-BO,":[149],"DenseNet-121,":[150],"SuffleNet-":[151],"V2,":[152],"Swint,":[153],"Convnext-":[154],"Tiny,":[155],"GoogLeNet,":[156],"MNASsnet,":[157],"Reg-":[159],"Net.":[160],"third":[163],"approach,":[164],"leverage":[166],"state-of-the-art":[168],"pre-trained":[170],"Whisper,":[173],"Seamless,":[174],"Speechbrain,":[175],"Pyannote":[177],"extract":[179],"embed":[181,190],"dings":[182,191],"spectrograms.":[186],"Then,":[187],"are":[192,215],"explored":[193],"by":[194],"Multilayer":[196],"perceptron":[197],"(MLP)":[198],"detect":[201],"fake":[202],"or":[203],"real":[204],"samples.":[206],"Finally,":[207],"high-performance":[208],"these":[213],"approaches":[214,268],"fused":[216],"achieve":[218],"best":[220,233],"performance.":[221],"We":[222],"evaluated":[223],"ASVspoof":[228],"2019":[229,254],"benchmark":[230],"dataset.":[231],"Our":[232],"ensemble":[234],"achieved":[236],"Equal":[238],"Error":[239],"Rate":[240],"(EER)":[241],"0.03,":[243],"highly":[246],"competitive":[247],"top-performing":[249],"systems":[250],"ASVspoofing":[253],"challenge.":[255],"Experimental":[256],"results":[257],"also":[258],"highlight":[259],"potential":[261],"selective":[263],"enhance":[270],"performance":[272]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":23},{"year":2024,"cited_by_count":5}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
