{"id":"https://openalex.org/W4372269174","doi":"https://doi.org/10.1109/icassp49357.2023.10095043","title":"ASSD: Synthetic Speech Detection in the AAC Compressed Domain","display_name":"ASSD: Synthetic Speech Detection in the AAC Compressed Domain","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372269174","doi":"https://doi.org/10.1109/icassp49357.2023.10095043"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095043","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095043","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055125973","display_name":"Amit Kumar Singh Yadav","orcid":"https://orcid.org/0000-0001-6464-7688"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Amit Kumar Singh Yadav","raw_affiliation_strings":["Purdue University,Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering,West Lafayette,Indiana,USA","Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering, Purdue University, West Lafayette, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Purdue University,Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering,West Lafayette,Indiana,USA","institution_ids":["https://openalex.org/I219193219"]},{"raw_affiliation_string":"Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering, Purdue University, West Lafayette, Indiana, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057999558","display_name":"Ziyue Xiang","orcid":"https://orcid.org/0000-0001-6054-5801"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ziyue Xiang","raw_affiliation_strings":["Purdue University,Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering,West Lafayette,Indiana,USA","Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering, Purdue University, West Lafayette, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Purdue University,Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering,West Lafayette,Indiana,USA","institution_ids":["https://openalex.org/I219193219"]},{"raw_affiliation_string":"Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering, Purdue University, West Lafayette, Indiana, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062428716","display_name":"Emily R. Bartusiak","orcid":"https://orcid.org/0000-0002-8202-8853"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Emily R. Bartusiak","raw_affiliation_strings":["Purdue University,Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering,West Lafayette,Indiana,USA","Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering, Purdue University, West Lafayette, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Purdue University,Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering,West Lafayette,Indiana,USA","institution_ids":["https://openalex.org/I219193219"]},{"raw_affiliation_string":"Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering, Purdue University, West Lafayette, Indiana, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051370303","display_name":"Paolo Bestagini","orcid":"https://orcid.org/0000-0003-0406-0222"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Paolo Bestagini","raw_affiliation_strings":["Politecnico di Milano,Dipartimento di Elettronica, Informazione e Bioingegneria,Milano,Italy","Dipartimento di Elettronica, Informazione e Bioingegneria, Politecnico di Milano, Milano, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano,Dipartimento di Elettronica, Informazione e Bioingegneria,Milano,Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Dipartimento di Elettronica, Informazione e Bioingegneria, Politecnico di Milano, Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005378965","display_name":"Stefano Tubaro","orcid":"https://orcid.org/0000-0002-1990-9869"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Stefano Tubaro","raw_affiliation_strings":["Politecnico di Milano,Dipartimento di Elettronica, Informazione e Bioingegneria,Milano,Italy","Dipartimento di Elettronica, Informazione e Bioingegneria, Politecnico di Milano, Milano, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano,Dipartimento di Elettronica, Informazione e Bioingegneria,Milano,Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Dipartimento di Elettronica, Informazione e Bioingegneria, Politecnico di Milano, Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089688702","display_name":"Edward J. Delp","orcid":"https://orcid.org/0000-0002-2909-7323"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Edward J. Delp","raw_affiliation_strings":["Purdue University,Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering,West Lafayette,Indiana,USA","Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering, Purdue University, West Lafayette, Indiana, USA"],"affiliations":[{"raw_affiliation_string":"Purdue University,Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering,West Lafayette,Indiana,USA","institution_ids":["https://openalex.org/I219193219"]},{"raw_affiliation_string":"Video and Image Processing Lab (VIPER), School of Electrical and Computer Engineering, Purdue University, West Lafayette, Indiana, USA","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5055125973"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":1.0189,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.74816901,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":"15","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.794355034828186},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6712140440940857},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.6364799737930298},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.5661799311637878},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.5520845651626587},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.36139118671417236},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.344948410987854},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3304566740989685}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.794355034828186},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6712140440940857},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.6364799737930298},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.5661799311637878},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.5520845651626587},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.36139118671417236},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.344948410987854},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3304566740989685}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095043","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095043","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5099999904632568,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320338294","display_name":"Air Force Research Laboratory","ror":"https://ror.org/02e2egq70"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W1474022922","https://openalex.org/W2062826588","https://openalex.org/W2066452495","https://openalex.org/W2101807845","https://openalex.org/W2103869314","https://openalex.org/W2128301448","https://openalex.org/W2520774990","https://openalex.org/W2567375321","https://openalex.org/W2571081683","https://openalex.org/W2590129515","https://openalex.org/W2733416080","https://openalex.org/W2809971434","https://openalex.org/W2888728157","https://openalex.org/W2936802426","https://openalex.org/W2955054437","https://openalex.org/W2964135678","https://openalex.org/W2967606780","https://openalex.org/W3157576687","https://openalex.org/W3160614129","https://openalex.org/W3161011913","https://openalex.org/W3163573274","https://openalex.org/W3169317518","https://openalex.org/W3170179936","https://openalex.org/W3196974791","https://openalex.org/W3205475937","https://openalex.org/W3206996142","https://openalex.org/W3213515747","https://openalex.org/W3216567096","https://openalex.org/W4214896311","https://openalex.org/W4221149301","https://openalex.org/W4224986264","https://openalex.org/W4246413330","https://openalex.org/W4293363567","https://openalex.org/W4362684594","https://openalex.org/W6630236247","https://openalex.org/W6631190155","https://openalex.org/W6726946684","https://openalex.org/W6739901393","https://openalex.org/W6748588790","https://openalex.org/W6755207826","https://openalex.org/W6776384377","https://openalex.org/W6778823374","https://openalex.org/W6784333009","https://openalex.org/W6792861227","https://openalex.org/W6795261426","https://openalex.org/W6796464841","https://openalex.org/W6802510933","https://openalex.org/W6810556107","https://openalex.org/W6828183479"],"related_works":["https://openalex.org/W2897924318","https://openalex.org/W2138997758","https://openalex.org/W191108438","https://openalex.org/W1911859126","https://openalex.org/W2541680182","https://openalex.org/W642007152","https://openalex.org/W2131711534","https://openalex.org/W2559040841","https://openalex.org/W114661351","https://openalex.org/W2056066842"],"abstract_inverted_index":{"Synthetic":[0,74],"human":[1],"speech":[2,64,90,134,144,159],"signals":[3,16,65],"have":[4],"become":[5],"very":[6],"easy":[7],"to":[8,36,54,110,124,167,191],"generate":[9],"given":[10],"modern":[11],"text-to-speech":[12],"methods.":[13,136,178,193],"When":[14],"these":[15],"are":[17,23],"shared":[18],"on":[19,140],"social":[20],"media":[21],"they":[22],"often":[24],"compressed":[25,49,84,105,143,164],"using":[26,96,114],"the":[27,47,63,82,89,94,106,111,120,162],"Advanced":[28],"Audio":[29],"Coding":[30],"(AAC)":[31],"standard.":[32],"Our":[33,179],"goal":[34],"is":[35,52,172],"study":[37],"if":[38],"a":[39,97,125,130,183],"small":[40],"set":[41],"of":[42,62,122],"coding":[43],"metadata":[44],"contained":[45],"in":[46],"AAC":[48,73,83,112,163],"bit":[50,85,165],"stream":[51,86,166],"sufficient":[53],"detect":[55,168],"synthetic":[56,133,169],"speech.":[57,170],"This":[58,171],"would":[59],"avoid":[60],"decompressing":[61,88],"before":[66],"analysis.":[67],"We":[68,118,137],"call":[69],"our":[70,102,150],"proposed":[71,151],"method":[72,152,180],"Speech":[75],"Detection":[76],"(ASSD).":[77],"ASSD":[78,92,123,139],"extracts":[79],"information":[80,95],"from":[81,161],"without":[87],"signal.":[91],"analyzes":[93],"transformer":[98],"neural":[99],"network.":[100],"In":[101],"experiments,":[103],"we":[104],"ASVspoof2019":[107],"dataset":[108],"according":[109],"standard":[113],"different":[115],"data":[116],"rates.":[117],"compared":[119,190],"performance":[121],"time":[126],"domain":[127],"based":[128,132],"and":[129],"spectrogram":[131],"detection":[135,188],"evaluated":[138],"approximately":[141],"71k":[142],"signals.":[145],"The":[146],"results":[147],"show":[148],"that":[149],"typically":[153],"only":[154],"requires":[155],"1000":[156],"bits":[157],"per":[158],"block/frame":[160],"much":[173],"lower":[174],"than":[175],"other":[176],"reported":[177],"also":[181],"had":[182],"9.7":[184],"percentage":[185],"points":[186],"higher":[187],"accuracy":[189],"existing":[192]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
