{"id":"https://openalex.org/W4372267288","doi":"https://doi.org/10.1109/icassp49357.2023.10096848","title":"Noise-Disentanglement Metric Learning for Robust Speaker Verification","display_name":"Noise-Disentanglement Metric Learning for Robust Speaker Verification","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372267288","doi":"https://doi.org/10.1109/icassp49357.2023.10096848"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096848","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096848","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054764710","display_name":"Yao Sun","orcid":"https://orcid.org/0000-0002-4391-377X"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yao Sun","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041985945","display_name":"Hanyi Zhang","orcid":"https://orcid.org/0000-0003-0109-6481"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanyi Zhang","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050763764","display_name":"Longbiao Wang","orcid":"https://orcid.org/0000-0002-4005-5036"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longbiao Wang","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004287909","display_name":"Kong Aik Lee","orcid":"https://orcid.org/0000-0001-9133-3000"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Kong Aik Lee","raw_affiliation_strings":["A*STAR,Institute for Infocomm Research,Singapore","Institute for Infocomm Research, A*STAR, Singapore"],"affiliations":[{"raw_affiliation_string":"A*STAR,Institute for Infocomm Research,Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Institute for Infocomm Research, A*STAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100457438","display_name":"Meng Liu","orcid":"https://orcid.org/0000-0002-1582-5764"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Liu","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017251198","display_name":"Jianwu Dang","orcid":"https://orcid.org/0000-0002-9237-4821"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwu Dang","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5054764710"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":2.0948,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.89369029,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.755418598651886},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6816744208335876},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6627597808837891},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.6483784914016724},{"id":"https://openalex.org/keywords/decoupling","display_name":"Decoupling (probability)","score":0.5517586469650269},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5510717034339905},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5458112955093384},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5376862287521362},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4965868592262268},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.4484194219112396},{"id":"https://openalex.org/keywords/noise-measurement","display_name":"Noise measurement","score":0.44235676527023315},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.43886059522628784},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4334869980812073},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.381704717874527},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.15183353424072266},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.113740473985672},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08517539501190186}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.755418598651886},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6816744208335876},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6627597808837891},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.6483784914016724},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.5517586469650269},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5510717034339905},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5458112955093384},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5376862287521362},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4965868592262268},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.4484194219112396},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.44235676527023315},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.43886059522628784},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4334869980812073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.381704717874527},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.15183353424072266},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.113740473985672},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08517539501190186},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096848","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096848","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1589137271","https://openalex.org/W1936725236","https://openalex.org/W2055611715","https://openalex.org/W2090716422","https://openalex.org/W2117671523","https://openalex.org/W2150769028","https://openalex.org/W2159202424","https://openalex.org/W2219249508","https://openalex.org/W2407127962","https://openalex.org/W2513927927","https://openalex.org/W2604341542","https://openalex.org/W2612434969","https://openalex.org/W2726515241","https://openalex.org/W2746742816","https://openalex.org/W2890964092","https://openalex.org/W2982037672","https://openalex.org/W3015231328","https://openalex.org/W3024869864","https://openalex.org/W3025783616","https://openalex.org/W3196650842","https://openalex.org/W4206633762","https://openalex.org/W4224927738","https://openalex.org/W4283703262","https://openalex.org/W4321780088","https://openalex.org/W6635152626"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2162158162","https://openalex.org/W4235705411","https://openalex.org/W1521299571","https://openalex.org/W4247736853","https://openalex.org/W1493012537","https://openalex.org/W2175373321","https://openalex.org/W2134501921","https://openalex.org/W2125642021","https://openalex.org/W2144470400"],"abstract_inverted_index":{"Automatic":[0],"speaker":[1,39,51,88,106],"verification":[2,107],"(ASV)":[3],"suffers":[4],"from":[5],"performance":[6,103],"degradation":[7],"in":[8,109],"noisy":[9,25,91,113],"environments.":[10,92],"To":[11],"solve":[12],"this":[13],"problem,":[14],"we":[15],"propose":[16],"the":[17,23,34,38,60,64,69,74,84,98,102,105],"noise-disentanglement":[18],"metric":[19],"learning":[20],"to":[21,46,55,67,82],"reduce":[22],"speaker-irrelevant":[24],"components":[26],"and":[27,41,59,112],"build":[28],"a":[29],"noise-invariant":[30],"embedding":[31],"space.":[32],"Specifically,":[33],"disentanglement":[35],"module,":[36,43],"including":[37],"encoder":[40,52],"re-construction":[42],"is":[44,53,80],"dedicated":[45],"decoupling":[47],"speech":[48],"signals.":[49],"The":[50],"used":[54],"disentangle":[56],"speaker-related":[57],"components,":[58],"reconstruction":[61],"module":[62],"increases":[63],"model\u2019s":[65],"ability":[66],"constrain":[68],"noise":[70],"information":[71],"by":[72],"re-constructing":[73],"signal.":[75],"In":[76],"addition,":[77],"distribution":[78],"optimization":[79],"introduced":[81],"supervise":[83],"spatial":[85],"structure":[86],"of":[87,104],"embeddings":[89],"under":[90],"Experiments":[93],"on":[94],"Vox-Celeb1":[95],"indicate":[96],"that":[97],"proposed":[99],"method":[100],"improves":[101],"system":[108],"both":[110],"clean":[111],"conditions.":[114]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
