{"id":"https://openalex.org/W3206189675","doi":"https://doi.org/10.1109/icassp43922.2022.9747814","title":"Large-Scale Self-Supervised Speech Representation Learning for Automatic Speaker Verification","display_name":"Large-Scale Self-Supervised Speech Representation Learning for Automatic Speaker Verification","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3206189675","doi":"https://doi.org/10.1109/icassp43922.2022.9747814","mag":"3206189675"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747814","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747814","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101416769","display_name":"Zhengyang Chen","orcid":"https://orcid.org/0000-0003-1293-8146"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]},{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CA","CN","FI"],"is_corresponding":true,"raw_author_name":"Zhengyang Chen","raw_affiliation_strings":["Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute, X-LANCE Lab,Department of Computer Science and Engineering","Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, X-LANCE Lab, Shanghai Jiao Tong University","Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute, X-LANCE Lab,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, X-LANCE Lab, Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079533447","display_name":"Sanyuan Chen","orcid":"https://orcid.org/0000-0002-3082-6052"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Sanyuan Chen","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101709477","display_name":"Yu Wu","orcid":"https://orcid.org/0000-0002-5715-3011"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Yu Wu","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100342006","display_name":"Yao Qian","orcid":"https://orcid.org/0000-0003-1855-9630"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA","FI"],"is_corresponding":false,"raw_author_name":"Yao Qian","raw_affiliation_strings":["Microsoft Corporation","Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute, X-LANCE Lab,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute, X-LANCE Lab,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I4210164862"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101863385","display_name":"Chengyi Wang","orcid":"https://orcid.org/0000-0002-6780-9299"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Chengyi Wang","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101635405","display_name":"Shujie Liu","orcid":"https://orcid.org/0009-0008-0785-8882"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Shujie Liu","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100341993","display_name":"Yanmin Qian","orcid":"https://orcid.org/0000-0002-0314-3790"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA","CN","FI"],"is_corresponding":false,"raw_author_name":"Yanmin Qian","raw_affiliation_strings":["Microsoft Corporation","Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute, X-LANCE Lab,Department of Computer Science and Engineering","Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, X-LANCE Lab, Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Shanghai Jiao Tong University,MoE Key Lab of Artificial Intelligence, AI Institute, X-LANCE Lab,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, X-LANCE Lab, Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089195158","display_name":"Michael Zeng","orcid":"https://orcid.org/0000-0001-5302-5883"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Michael Zeng","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101416769"],"corresponding_institution_ids":["https://openalex.org/I183067930","https://openalex.org/I4210105678","https://openalex.org/I4210164862"],"apc_list":null,"apc_paid":null,"fwci":9.597,"has_fulltext":false,"cited_by_count":101,"citation_normalized_percentile":{"value":0.98712242,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"6147","last_page":"6151"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.8193928599357605},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7929307222366333},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6566754579544067},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6413291096687317},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.6048580408096313},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6026979684829712},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5349098443984985},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4596068859100342},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.44538867473602295},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4393406808376312},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.435455322265625},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3490244746208191},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08473843336105347}],"concepts":[{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.8193928599357605},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7929307222366333},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6566754579544067},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6413291096687317},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.6048580408096313},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6026979684829712},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5349098443984985},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4596068859100342},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.44538867473602295},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4393406808376312},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.435455322265625},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3490244746208191},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08473843336105347},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747814","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747814","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.49000000953674316}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W125553504","https://openalex.org/W1006777433","https://openalex.org/W2157161740","https://openalex.org/W2219249508","https://openalex.org/W2517995648","https://openalex.org/W2590129515","https://openalex.org/W2726515241","https://openalex.org/W2747238065","https://openalex.org/W2752782242","https://openalex.org/W2794506738","https://openalex.org/W2808631503","https://openalex.org/W2888968865","https://openalex.org/W2889519245","https://openalex.org/W2890964092","https://openalex.org/W2896457183","https://openalex.org/W2928165649","https://openalex.org/W2937142395","https://openalex.org/W2948947170","https://openalex.org/W2962788625","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2963420686","https://openalex.org/W2964052309","https://openalex.org/W2969985801","https://openalex.org/W2972909277","https://openalex.org/W2981461916","https://openalex.org/W3010925296","https://openalex.org/W3024869864","https://openalex.org/W3036601975","https://openalex.org/W3094374485","https://openalex.org/W3099782249","https://openalex.org/W3157923770","https://openalex.org/W3160397447","https://openalex.org/W3161606033","https://openalex.org/W3163187953","https://openalex.org/W3169320628","https://openalex.org/W3179803166","https://openalex.org/W3197580070","https://openalex.org/W3197642003","https://openalex.org/W3198275944","https://openalex.org/W3198698812","https://openalex.org/W3206252155","https://openalex.org/W3209059054","https://openalex.org/W4226380987","https://openalex.org/W4288091954","https://openalex.org/W4385245566","https://openalex.org/W6688816777","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6761176859","https://openalex.org/W6769178842","https://openalex.org/W6780218876","https://openalex.org/W6801723603","https://openalex.org/W6803394801"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W4313854686","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W289407349","https://openalex.org/W2029134149","https://openalex.org/W2499802997","https://openalex.org/W2112059504","https://openalex.org/W1960256358"],"abstract_inverted_index":{"The":[0,66,91],"speech":[1,39],"representations":[2,40,67],"learned":[3,41],"from":[4,14,68],"large-scale":[5],"unlabeled":[6],"data":[7],"have":[8],"shown":[9],"better":[10],"generalizability":[11],"than":[12],"those":[13],"supervised":[15],"learning":[16],"and":[17,46,82,121,151],"thus":[18],"attract":[19],"a":[20,55,63,108],"lot":[21],"of":[22,38,72,132,166],"interest":[23],"to":[24,106,148],"be":[25],"applied":[26],"for":[27,48,112],"various":[28],"downstream":[29,64],"tasks.":[30],"In":[31],"this":[32],"paper,":[33],"we":[34],"explore":[35],"the":[36,73,86,99,128,136,146,154,162,167,175],"limits":[37],"by":[42],"different":[43],"self-supervised":[44],"objectives":[45],"datasets":[47],"automatic":[49],"speaker":[50],"verification":[51],"(ASV),":[52],"especially":[53],"with":[54,79,139],"well-recognized":[56],"SOTA":[57],"ASV":[58],"model,":[59],"ECAPA-TDNN":[60,87],"[1],":[61],"as":[62,88],"model.":[65],"all":[69],"hidden":[70],"layers":[71],"pre-trained":[74,141],"model":[75],"are":[76],"firstly":[77],"averaged":[78],"learnable":[80],"weights":[81],"then":[83],"fed":[84],"into":[85],"input":[89],"features.":[90],"experimental":[92],"results":[93],"on":[94,127,174],"Voxceleb":[95],"dataset":[96],"show":[97],"that":[98],"weighted":[100],"average":[101],"representation":[102],"is":[103],"significantly":[104],"superior":[105],"FBank,":[107],"conventional":[109],"handcrafted":[110],"feature":[111],"ASV.":[113],"Our":[114],"best":[115,159],"single":[116],"system":[117,138,160,164],"achieves":[118],"0.537%,":[119],"0.569%,":[120],"1.180%":[122],"equal":[123],"error":[124],"rate":[125],"(EER)":[126],"three":[129,140,155],"official":[130],"trials":[131],"VoxCeleb1,":[133],"separately.":[134],"Accordingly,":[135],"ensemble":[137],"models":[142],"can":[143],"further":[144],"improve":[145],"EER":[147],"0.479%,":[149],"0.536%":[150],"1.023%.":[152],"Among":[153],"evaluation":[156],"trials,":[157],"our":[158],"outperforms":[161],"winner":[163],"[2]":[165],"VoxCeleb":[168],"Speaker":[169],"Recognition":[170],"Challenge":[171],"2021":[172],"(VoxSRC2021)":[173],"VoxCeleb1-E":[176],"trial.":[177]},"counts_by_year":[{"year":2026,"cited_by_count":9},{"year":2025,"cited_by_count":33},{"year":2024,"cited_by_count":26},{"year":2023,"cited_by_count":31},{"year":2022,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
