{"id":"https://openalex.org/W3015469941","doi":"https://doi.org/10.1109/icassp40776.2020.9054322","title":"Adversarial Multi-Task Learning for Speaker Normalization in Replay Detection","display_name":"Adversarial Multi-Task Learning for Speaker Normalization in Replay Detection","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015469941","doi":"https://doi.org/10.1109/icassp40776.2020.9054322","mag":"3015469941"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054322","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054322","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038611899","display_name":"Gajan Suthokumar","orcid":null},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Gajan Suthokumar","raw_affiliation_strings":["School of Electrical Engineering and Telecommunications, UNSW, Australia"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Telecommunications, UNSW, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032689109","display_name":"Vidhyasaharan Sethu","orcid":"https://orcid.org/0000-0001-8492-1787"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Vidhyasaharan Sethu","raw_affiliation_strings":["School of Electrical Engineering and Telecommunications, UNSW, Australia"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Telecommunications, UNSW, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063636547","display_name":"Kaavya Sriskandaraja","orcid":"https://orcid.org/0000-0002-8625-4022"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Kaavya Sriskandaraja","raw_affiliation_strings":["School of Electrical Engineering and Telecommunications, UNSW, Australia"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Telecommunications, UNSW, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028116210","display_name":"Eliathamby Ambikairajah","orcid":"https://orcid.org/0000-0003-4673-6534"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Eliathamby Ambikairajah","raw_affiliation_strings":["School of Electrical Engineering and Telecommunications, UNSW, Australia"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Telecommunications, UNSW, Australia","institution_ids":["https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5038611899"],"corresponding_institution_ids":["https://openalex.org/I31746571"],"apc_list":null,"apc_paid":null,"fwci":0.9279,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.7987798,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"28","issue":null,"first_page":"6609","last_page":"6613"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8293553590774536},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.8181231617927551},{"id":"https://openalex.org/keywords/spoofing-attack","display_name":"Spoofing attack","score":0.7611420154571533},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.737411618232727},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.6265832781791687},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.6200681924819946},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5872063636779785},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.574070930480957},{"id":"https://openalex.org/keywords/replay-attack","display_name":"Replay attack","score":0.5169519782066345},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5131798386573792},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5054144859313965},{"id":"https://openalex.org/keywords/biometrics","display_name":"Biometrics","score":0.45973479747772217},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4400673806667328},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4269603490829468},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3258405923843384},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.323874831199646},{"id":"https://openalex.org/keywords/authentication","display_name":"Authentication (law)","score":0.07791063189506531}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8293553590774536},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.8181231617927551},{"id":"https://openalex.org/C167900197","wikidata":"https://www.wikidata.org/wiki/Q11081100","display_name":"Spoofing attack","level":2,"score":0.7611420154571533},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.737411618232727},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.6265832781791687},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.6200681924819946},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5872063636779785},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.574070930480957},{"id":"https://openalex.org/C11560541","wikidata":"https://www.wikidata.org/wiki/Q1756025","display_name":"Replay attack","level":3,"score":0.5169519782066345},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5131798386573792},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5054144859313965},{"id":"https://openalex.org/C184297639","wikidata":"https://www.wikidata.org/wiki/Q177765","display_name":"Biometrics","level":2,"score":0.45973479747772217},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4400673806667328},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4269603490829468},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3258405923843384},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.323874831199646},{"id":"https://openalex.org/C148417208","wikidata":"https://www.wikidata.org/wiki/Q4825882","display_name":"Authentication (law)","level":2,"score":0.07791063189506531},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054322","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054322","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6600000262260437,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1731081199","https://openalex.org/W1997604360","https://openalex.org/W2099471712","https://openalex.org/W2123299109","https://openalex.org/W2295072214","https://openalex.org/W2510867321","https://openalex.org/W2565961899","https://openalex.org/W2745744274","https://openalex.org/W2745896134","https://openalex.org/W2747024632","https://openalex.org/W2749090340","https://openalex.org/W2807325376","https://openalex.org/W2888989492","https://openalex.org/W2889084898","https://openalex.org/W2889361425","https://openalex.org/W2889380691","https://openalex.org/W2913340405","https://openalex.org/W2920907768","https://openalex.org/W2937328535","https://openalex.org/W2940385840","https://openalex.org/W2947567516","https://openalex.org/W2962897020","https://openalex.org/W2963447013","https://openalex.org/W2964002811","https://openalex.org/W4320013936","https://openalex.org/W6637618735","https://openalex.org/W6761649062"],"related_works":["https://openalex.org/W4294437891","https://openalex.org/W4226389478","https://openalex.org/W2790784932","https://openalex.org/W4297792928","https://openalex.org/W3113108043","https://openalex.org/W3007059209","https://openalex.org/W4221161333","https://openalex.org/W2061278248","https://openalex.org/W4298202768","https://openalex.org/W4220832730"],"abstract_inverted_index":{"Spoofing":[0],"detection":[1,97],"algorithms":[2],"in":[3,11],"voice":[4],"biometrics":[5],"are":[6],"adversely":[7],"affected":[8],"by":[9],"differences":[10],"the":[12,16,61,69,76,79,86,93,124],"speech":[13,57],"characteristics":[14],"of":[15,71,78,119,131],"various":[17],"target":[18],"users.":[19],"In":[20],"this":[21,38],"paper,":[22],"we":[23,91],"propose":[24],"a":[25,48],"novel":[26],"speaker":[27,39,72,81],"normalisation":[28,82],"technique":[29,83,94],"that":[30,51,130],"employs":[31],"adversarial":[32],"multi-task":[33],"learning":[34],"to":[35,46],"compensate":[36],"for":[37],"variability.":[40],"The":[41],"proposed":[42,80],"system":[43],"is":[44],"designed":[45],"learn":[47],"feature":[49,87],"space":[50],"discriminates":[52],"between":[53,63],"genuine":[54],"and":[55,74,108,112,121],"replayed":[56],"while":[58],"simultaneously":[59],"reduces":[60],"discrimination":[62],"different":[64,103],"speakers.":[65],"We":[66,116],"initially":[67],"characterise":[68],"impact":[70],"variability":[73],"quantify":[75],"effect":[77],"directly":[84],"on":[85,95,101,123],"distributions.":[88],"Following":[89],"this,":[90],"validate":[92],"spoofing":[96],"experiments":[98],"carried":[99],"out":[100],"two":[102,125],"corpora,":[104],"ASVSpoof":[105],"2017":[106],"v2.0":[107],"BTAS":[109],"2016":[110],"replay,":[111],"demonstrate":[113],"its":[114],"effectiveness.":[115],"obtain":[117],"EER":[118],"7.11%":[120],"0.83%":[122],"corpora":[126],"respectively,":[127],"lower":[128],"than":[129],"all":[132],"relevant":[133],"baselines.":[134]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
