{"id":"https://openalex.org/W4362684594","doi":"https://doi.org/10.2352/ei.2023.35.4.mwsf-372","title":"Synthetic speech attribution using self supervised audio spectrogram transformer","display_name":"Synthetic speech attribution using self supervised audio spectrogram transformer","publication_year":2023,"publication_date":"2023-01-16","ids":{"openalex":"https://openalex.org/W4362684594","doi":"https://doi.org/10.2352/ei.2023.35.4.mwsf-372"},"language":"en","primary_location":{"id":"doi:10.2352/ei.2023.35.4.mwsf-372","is_oa":true,"landing_page_url":"https://doi.org/10.2352/ei.2023.35.4.mwsf-372","pdf_url":"https://library.imaging.org/admin/apis/public/api/ist/website/downloadArticle/ei/35/4/MWSF-372","source":{"id":"https://openalex.org/S4210227276","display_name":"Electronic Imaging","issn_l":"2470-1173","issn":["2470-1173"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Electronic Imaging","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://library.imaging.org/admin/apis/public/api/ist/website/downloadArticle/ei/35/4/MWSF-372","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055125973","display_name":"Amit Kumar Singh Yadav","orcid":"https://orcid.org/0000-0001-6464-7688"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Amit Kumar Singh Yadav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062428716","display_name":"Emily R. Bartusiak","orcid":"https://orcid.org/0000-0002-8202-8853"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Emily R. Bartusiak","raw_affiliation_strings":["Video & Image Processing Laboratory , School of Electrical and Computer Engineering , Purdue University , West Lafayette , IN , USA"],"affiliations":[{"raw_affiliation_string":"Video & Image Processing Laboratory , School of Electrical and Computer Engineering , Purdue University , West Lafayette , IN , USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022400610","display_name":"Kratika Bhagtani","orcid":"https://orcid.org/0000-0001-7399-4876"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kratika Bhagtani","raw_affiliation_strings":["Video & Image Processing Laboratory , School of Electrical and Computer Engineering , Purdue University , West Lafayette , IN , USA"],"affiliations":[{"raw_affiliation_string":"Video & Image Processing Laboratory , School of Electrical and Computer Engineering , Purdue University , West Lafayette , IN , USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089688702","display_name":"Edward J. Delp","orcid":"https://orcid.org/0000-0002-2909-7323"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Edward J. Delp","raw_affiliation_strings":["Video & Image Processing Laboratory , School of Electrical and Computer Engineering , Purdue University , West Lafayette , IN , USA"],"affiliations":[{"raw_affiliation_string":"Video & Image Processing Laboratory , School of Electrical and Computer Engineering , Purdue University , West Lafayette , IN , USA","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5055125973"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.8347,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.92249611,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"35","issue":"4","first_page":"372","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8077230453491211},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.769251823425293},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7129591107368469},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6811355352401733},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.5170541405677795},{"id":"https://openalex.org/keywords/attribution","display_name":"Attribution","score":0.4996955394744873},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43986788392066956},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3351542353630066},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07227912545204163},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.05851089954376221}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8077230453491211},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.769251823425293},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7129591107368469},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6811355352401733},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.5170541405677795},{"id":"https://openalex.org/C143299363","wikidata":"https://www.wikidata.org/wiki/Q900584","display_name":"Attribution","level":2,"score":0.4996955394744873},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43986788392066956},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3351542353630066},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07227912545204163},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.05851089954376221},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.2352/ei.2023.35.4.mwsf-372","is_oa":true,"landing_page_url":"https://doi.org/10.2352/ei.2023.35.4.mwsf-372","pdf_url":"https://library.imaging.org/admin/apis/public/api/ist/website/downloadArticle/ei/35/4/MWSF-372","source":{"id":"https://openalex.org/S4210227276","display_name":"Electronic Imaging","issn_l":"2470-1173","issn":["2470-1173"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Electronic Imaging","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.2352/ei.2023.35.4.mwsf-372","is_oa":true,"landing_page_url":"https://doi.org/10.2352/ei.2023.35.4.mwsf-372","pdf_url":"https://library.imaging.org/admin/apis/public/api/ist/website/downloadArticle/ei/35/4/MWSF-372","source":{"id":"https://openalex.org/S4210227276","display_name":"Electronic Imaging","issn_l":"2470-1173","issn":["2470-1173"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Electronic Imaging","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7300000190734863,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4362684594.pdf","grobid_xml":"https://content.openalex.org/works/W4362684594.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W2897924318","https://openalex.org/W2138997758","https://openalex.org/W1911859126","https://openalex.org/W2120730869","https://openalex.org/W2541680182","https://openalex.org/W2118508889","https://openalex.org/W2166699153"],"abstract_inverted_index":{"The":[0],"ability":[1,144],"to":[2,11,145,147,184],"synthesize":[3],"convincing":[4],"human":[5],"speech":[6,15,30,38,47,60,95,149,192],"has":[7,155],"become":[8],"easier":[9],"due":[10],"the":[12,20,43,90,98,104,108,142,165],"availability":[13],"of":[14,22,45,162],"generation":[16,150],"tools.":[17],"This":[18,73],"necessitates":[19],"development":[21],"forensics":[23],"methods":[24,151],"that":[25,179],"can":[26],"authenticate":[27],"and":[28,65,86,107,131,168],"attribute":[29],"signals.":[31,193],"In":[32],"this":[33],"paper,":[34],"we":[35,177],"examine":[36],"a":[37,46,67],"attribution":[39,96],"task,":[40],"which":[41],"identifies":[42],"origin":[44],"signal.":[48],"Our":[49],"proposed":[50],"method":[51],"known":[52],"as":[53],"Synthetic":[54],"Speech":[55],"Attribution":[56,102,137,174],"Transformer":[57],"(SSAT)":[58],"converts":[59],"signals":[61],"into":[62],"mel":[63],"spectrograms":[64],"uses":[66],"self-supervised":[68],"pretrained":[69,76,91],"transformer":[70,74,92],"for":[71,191],"attribution.":[72],"is":[75,182],"on":[77,93,119,123,127,133,164,170],"two":[78],"large":[79],"publicly":[80],"available":[81],"audio":[82],"datasets:":[83,97],"Audio":[84,101,136,173],"Set":[85],"LibriSpeech.":[87],"We":[88,139],"finetune":[89],"three":[94],"DARPA":[99,134,171],"SemaFor":[100,135,172],"dataset,":[103,106,125,130],"ASVspoof2019":[105,124,166],"2022":[109],"IEEE":[110],"SP":[111,128],"Cup":[112,129],"dataset.":[113,175],"SSAT":[114,154],"achieves":[115],"high":[116,156],"closed-set":[117],"accuracy":[118,161],"all":[120],"datasets":[121],"(99.8%":[122],"96.3%":[126],"93.4%":[132],"dataset).":[138],"also":[140],"investigate":[141],"method&#xE2;&#x20AC;&#x2122;s":[143],"generalize":[146],"unknown":[148],"(open-set":[152],"scenario).":[153],"performance,":[157],"achieving":[158],"an":[159],"open-set":[160],"90.2%":[163],"dataset":[167],"88.45%":[169],"Finally,":[176],"show":[178],"our":[180],"approach":[181],"robust":[183],"typical":[185],"compression":[186],"rates":[187],"used":[188],"by":[189],"YouTube":[190]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":6}],"updated_date":"2026-03-10T14:07:55.174380","created_date":"2025-10-10T00:00:00"}
