{"id":"https://openalex.org/W4388579618","doi":"https://doi.org/10.1109/taslp.2023.3331949","title":"Self-Supervised Learning With Cluster-Aware-DINO for High-Performance Robust Speaker Verification","display_name":"Self-Supervised Learning With Cluster-Aware-DINO for High-Performance Robust Speaker Verification","publication_year":2023,"publication_date":"2023-11-10","ids":{"openalex":"https://openalex.org/W4388579618","doi":"https://doi.org/10.1109/taslp.2023.3331949"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3331949","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3331949","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100690517","display_name":"Bing Han","orcid":"https://orcid.org/0000-0002-6319-6755"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bing Han","raw_affiliation_strings":["Department of Computer Science and Engineering &amp; MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering &amp; MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101416769","display_name":"Zhengyang Chen","orcid":"https://orcid.org/0000-0003-1293-8146"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengyang Chen","raw_affiliation_strings":["Department of Computer Science and Engineering &amp; MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering &amp; MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100341993","display_name":"Yanmin Qian","orcid":"https://orcid.org/0000-0002-0314-3790"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanmin Qian","raw_affiliation_strings":["Department of Computer Science and Engineering &amp; MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering &amp; MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100690517"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":5.5614,"has_fulltext":false,"cited_by_count":32,"citation_normalized_percentile":{"value":0.96797862,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"32","issue":null,"first_page":"529","last_page":"541"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8430677652359009},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6169434785842896},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5890278816223145},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.572388768196106},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5042909383773804},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4721165895462036},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4310429096221924},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3720461130142212}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8430677652359009},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6169434785842896},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5890278816223145},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.572388768196106},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5042909383773804},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4721165895462036},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4310429096221924},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3720461130142212},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2023.3331949","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3331949","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W1821462560","https://openalex.org/W2041823554","https://openalex.org/W2046056978","https://openalex.org/W2114925438","https://openalex.org/W2150769028","https://openalex.org/W2194775991","https://openalex.org/W2219249508","https://openalex.org/W2341528187","https://openalex.org/W2696967604","https://openalex.org/W2726515241","https://openalex.org/W2747165665","https://openalex.org/W2752782242","https://openalex.org/W2766245080","https://openalex.org/W2808631503","https://openalex.org/W2883725317","https://openalex.org/W2889519245","https://openalex.org/W2890964092","https://openalex.org/W2928165649","https://openalex.org/W2969985801","https://openalex.org/W2972705840","https://openalex.org/W2972986505","https://openalex.org/W2998702515","https://openalex.org/W3010893827","https://openalex.org/W3010925296","https://openalex.org/W3013020904","https://openalex.org/W3015734344","https://openalex.org/W3023351797","https://openalex.org/W3024869864","https://openalex.org/W3034187833","https://openalex.org/W3035524453","https://openalex.org/W3101998545","https://openalex.org/W3111481301","https://openalex.org/W3135006803","https://openalex.org/W3159481202","https://openalex.org/W3160397447","https://openalex.org/W3161606033","https://openalex.org/W3163187953","https://openalex.org/W3196649213","https://openalex.org/W3197627775","https://openalex.org/W3198275944","https://openalex.org/W3202801947","https://openalex.org/W3205635414","https://openalex.org/W3206189675","https://openalex.org/W3209059054","https://openalex.org/W4220739185","https://openalex.org/W4221167533","https://openalex.org/W4224916451","https://openalex.org/W4224924217","https://openalex.org/W4226254439","https://openalex.org/W4286981691","https://openalex.org/W4287635082","https://openalex.org/W4288091954","https://openalex.org/W4290712827","https://openalex.org/W4290995108","https://openalex.org/W4296068770","https://openalex.org/W4297841416","https://openalex.org/W4297841773","https://openalex.org/W4385823202","https://openalex.org/W4394665180","https://openalex.org/W6638523607","https://openalex.org/W6688816777","https://openalex.org/W6762161020","https://openalex.org/W6769178842","https://openalex.org/W6773005947","https://openalex.org/W6774314701","https://openalex.org/W6779230768","https://openalex.org/W6779997284","https://openalex.org/W6780218876","https://openalex.org/W6784330115","https://openalex.org/W6784400926","https://openalex.org/W6787439801","https://openalex.org/W6788329692","https://openalex.org/W6800698692","https://openalex.org/W6801362334","https://openalex.org/W6809668307","https://openalex.org/W6864750640"],"related_works":["https://openalex.org/W2366107444","https://openalex.org/W4388145910","https://openalex.org/W1976205134","https://openalex.org/W2381570729","https://openalex.org/W4248336175","https://openalex.org/W3009369890","https://openalex.org/W2031260042","https://openalex.org/W2391445434","https://openalex.org/W4312490297","https://openalex.org/W2062212388"],"abstract_inverted_index":{"The":[0,196],"automatic":[1],"speaker":[2,37,75,210],"verification":[3,38,76,211],"task":[4],"has":[5,39],"achieved":[6],"great":[7],"success":[8],"using":[9,82,200,260],"deep":[10],"learning":[11,63,134,247],"approaches":[12],"with":[13,78,98,233,253],"a":[14,21,41,58,73,119,138],"large-scale,":[15],"manually":[16],"annotated":[17],"dataset.":[18,204],"However,":[19],"collecting":[20],"significant":[22],"amount":[23],"of":[24,43,90,129,140,148,220],"well-labeled":[25],"data":[26],"for":[27,123,153],"system":[28,77,155,248],"building":[29],"is":[30,151],"very":[31],"difficult":[32],"and":[33,60,164,223,228,238],"expensive.":[34],"Recently,":[35],"self-supervised":[36,62,209,246],"attracted":[40],"lot":[42],"interest":[44],"due":[45,136],"to":[46,125,137,160,169,185,191,206],"its":[47],"no":[48,99],"dependency":[49],"on":[50,66,187,225],"labeled":[51,84],"data.":[52,85,130,263],"In":[53,131],"this":[54],"article,":[55],"we":[56,94,116,179],"propose":[57,161],"novel":[59],"advanced":[61],"framework":[64,102],"based":[65],"our":[67,213],"prior":[68],"work,":[69],"which":[70,107],"can":[71,108],"construct":[72],"powerful":[74],"high":[79],"performance":[80,172],"without":[81,111,259],"any":[83,261],"To":[86],"avoid":[87],"the":[88,96,104,127,132,146,154,171,181,188,194,201,207,243,254],"impact":[89],"false":[91],"negative":[92,113],"pairs,":[93],"adopt":[95],"self-distillation":[97],"labels":[100,142,150],"(DINO)":[101],"as":[103],"initial":[105],"model,":[106],"be":[109],"trained":[110],"exploiting":[112],"pairs.":[114],"Then,":[115],"further":[117,192],"introduce":[118],"cluster-aware":[120],"training":[121],"strategy":[122],"DINO":[124],"improve":[126,193],"diversity":[128],"iterative":[133],"stage,":[135],"mass":[139],"unreliable":[141,176],"from":[143,183],"unsupervised":[144],"clustering,":[145],"quality":[147],"pseudo":[149],"important":[152],"performance.":[156,195],"This":[157],"motivates":[158],"us":[159],"dynamic":[162],"loss-gate":[163],"label":[165],"correction":[166],"(DLG-LC)":[167],"methods":[168],"alleviate":[170],"degradation":[173],"caused":[174],"by":[175],"labels.":[177],"Furthermore,":[178],"extend":[180],"DLG-LC":[182],"single-modality":[184],"multi-modality":[186],"audio-visual":[189],"dataset":[190],"experiments":[197],"were":[198],"conducted":[199],"widely-used":[202],"Voxceleb":[203],"Compared":[205],"best-known":[208],"system,":[212,257],"proposed":[214,245],"method":[215],"achieve":[216],"relative":[217],"EER":[218],"improvement":[219],"22.17%,":[221],"27.94%":[222],"25.56%":[224],"Vox-O,":[226],"Vox-E":[227],"Vox-H":[229],"test":[230],"sets,":[231],"even":[232,249],"fewer":[234],"iterations,":[235],"smaller":[236],"models,":[237],"simpler":[239],"clustering":[240],"methods.":[241],"Importantly,":[242],"newly":[244],"achieves":[250],"comparable":[251],"results":[252],"fully":[255],"supervised":[256],"but":[258],"human-labeled":[262]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":3}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
