{"id":"https://openalex.org/W4225096077","doi":"https://doi.org/10.21437/interspeech.2022-10019","title":"Why does Self-Supervised Learning for Speech Recognition Benefit Speaker Recognition?","display_name":"Why does Self-Supervised Learning for Speech Recognition Benefit Speaker Recognition?","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4225096077","doi":"https://doi.org/10.21437/interspeech.2022-10019"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-10019","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-10019","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079533447","display_name":"Sanyuan Chen","orcid":"https://orcid.org/0000-0002-3082-6052"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sanyuan Chen","raw_affiliation_strings":["Harbin Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100324098","display_name":"Yu Wu","orcid":"https://orcid.org/0000-0002-1680-8253"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Yu Wu","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101863385","display_name":"Chengyi Wang","orcid":"https://orcid.org/0000-0002-6780-9299"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Chengyi Wang","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101635405","display_name":"Shujie Liu","orcid":"https://orcid.org/0009-0008-0785-8882"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Shujie Liu","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100345092","display_name":"Zhuo Chen","orcid":"https://orcid.org/0000-0002-9011-7928"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhuo Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062250845","display_name":"Peidong Wang","orcid":"https://orcid.org/0000-0002-7042-0209"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Peidong Wang","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100412456","display_name":"Gang Liu","orcid":"https://orcid.org/0000-0002-6439-3195"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Gang Liu","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101674460","display_name":"Jian Wu","orcid":"https://orcid.org/0000-0002-3101-7011"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jian Wu","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072540013","display_name":"Xiangzhan Yu","orcid":"https://orcid.org/0000-0002-1183-2844"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["CN","FI"],"is_corresponding":false,"raw_author_name":"Xiangzhan Yu","raw_affiliation_strings":["Microsoft Corporation","Harbin Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Harbin Institute of Technology, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014662947","display_name":"Furu Wei","orcid":"https://orcid.org/0000-0002-7810-5852"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Furu Wei","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5079533447"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":3.1314,"has_fulltext":false,"cited_by_count":32,"citation_normalized_percentile":{"value":0.93104362,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3699","last_page":"3703"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9717000126838684,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7885206937789917},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7758883237838745},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.7403393983840942},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3754374384880066}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7885206937789917},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7758883237838745},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.7403393983840942},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3754374384880066}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-10019","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-10019","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.550000011920929,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2594633041","https://openalex.org/W2747874407","https://openalex.org/W2777662428","https://openalex.org/W2928165649","https://openalex.org/W2936774411","https://openalex.org/W2963799213","https://openalex.org/W2969985801","https://openalex.org/W2971033911","https://openalex.org/W2981087920","https://openalex.org/W2995181338","https://openalex.org/W3024869864","https://openalex.org/W3036601975","https://openalex.org/W3094374485","https://openalex.org/W3119308075","https://openalex.org/W3142516134","https://openalex.org/W3169320628","https://openalex.org/W3197580070","https://openalex.org/W3198698812","https://openalex.org/W3204696009","https://openalex.org/W3209984917","https://openalex.org/W4221161761","https://openalex.org/W4226221575","https://openalex.org/W4285666836","https://openalex.org/W4287120025","https://openalex.org/W4306169301"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W4313854686","https://openalex.org/W2499802997","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W289407349","https://openalex.org/W2029134149","https://openalex.org/W2368768466","https://openalex.org/W2757081366"],"abstract_inverted_index":{"Recently,":[0],"self-supervised":[1,100],"learning":[2,33,101],"(SSL)":[3],"has":[4,80],"demonstrated":[5],"strong":[6],"performance":[7],"in":[8],"speaker":[9,103],"recognition,":[10],"even":[11],"if":[12],"the":[13,29,50,55,77,86,97],"pretraining":[14],"objective":[15],"is":[16,62],"designed":[17,45],"for":[18,102],"speech":[19,68],"recognition.In":[20],"this":[21],"paper,":[22],"we":[23],"study":[24],"which":[25],"factor":[26],"leads":[27],"to":[28,59,95],"success":[30],"of":[31,43,57,66,99],"selfsupervised":[32],"on":[34,49],"speaker-related":[35],"tasks,":[36],"e.g.speaker":[37],"verification":[38],"(SV),":[39],"through":[40],"a":[41,64,81],"series":[42],"carefully":[44],"experiments.Our":[46],"empirical":[47],"results":[48],"Voxceleb-1":[51],"dataset":[52],"suggest":[53],"that":[54],"benefit":[56],"SSL":[58,78],"SV":[60],"task":[61],"from":[63],"combination":[65],"mask":[67],"prediction":[69],"loss,":[70],"data":[71],"scale,":[72],"and":[73,91],"model":[74],"size,":[75],"while":[76],"quantizer":[79],"minor":[82],"impact.We":[83],"further":[84],"employ":[85],"integrated":[87],"gradients":[88],"attribution":[89],"method":[90],"loss":[92],"landscape":[93],"visualization":[94],"understand":[96],"effectiveness":[98],"recognition":[104],"performance.":[105]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":15}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
