{"id":"https://openalex.org/W4391454455","doi":"https://doi.org/10.1109/taslp.2024.3357033","title":"Interrelate Training and Clustering for Online Speaker Diarization","display_name":"Interrelate Training and Clustering for Online Speaker Diarization","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391454455","doi":"https://doi.org/10.1109/taslp.2024.3357033"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3357033","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3357033","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100405158","display_name":"Yifan Chen","orcid":"https://orcid.org/0009-0002-6946-7699"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yifan Chen","raw_affiliation_strings":["Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072887461","display_name":"Gaofeng Cheng","orcid":"https://orcid.org/0000-0002-2102-6061"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gaofeng Cheng","raw_affiliation_strings":["Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035774762","display_name":"Runyan Yang","orcid":"https://orcid.org/0000-0003-3466-6882"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runyan Yang","raw_affiliation_strings":["Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036446253","display_name":"Pengyuan Zhang","orcid":"https://orcid.org/0000-0001-6838-5160"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengyuan Zhang","raw_affiliation_strings":["Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100425112","display_name":"Yonghong Yan","orcid":"https://orcid.org/0000-0001-6907-5770"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yonghong Yan","raw_affiliation_strings":["Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100405158"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210099069","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":1.0193,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.7834724,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"32","issue":null,"first_page":"1352","last_page":"1364"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8811635971069336},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6861782670021057},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6793918609619141},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6040157675743103},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.510291576385498},{"id":"https://openalex.org/keywords/complete-linkage-clustering","display_name":"Complete-linkage clustering","score":0.5066476464271545},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41574782133102417},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3993874490261078},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3724291920661926},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.2993435859680176},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.21858304738998413},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.18879908323287964},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.08010026812553406}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8811635971069336},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6861782670021057},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6793918609619141},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6040157675743103},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.510291576385498},{"id":"https://openalex.org/C23822008","wikidata":"https://www.wikidata.org/wiki/Q5156437","display_name":"Complete-linkage clustering","level":5,"score":0.5066476464271545},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41574782133102417},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3993874490261078},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3724291920661926},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.2993435859680176},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.21858304738998413},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.18879908323287964},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.08010026812553406}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3357033","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3357033","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322847","display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","ror":"https://ror.org/031141b54"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":68,"referenced_works":["https://openalex.org/W123007118","https://openalex.org/W1981773323","https://openalex.org/W1998871699","https://openalex.org/W2016973429","https://openalex.org/W2045964207","https://openalex.org/W2046056978","https://openalex.org/W2083955451","https://openalex.org/W2088340225","https://openalex.org/W2096733369","https://openalex.org/W2141461755","https://openalex.org/W2150769028","https://openalex.org/W2153773386","https://openalex.org/W2159591770","https://openalex.org/W2187089797","https://openalex.org/W2219249508","https://openalex.org/W2222512263","https://openalex.org/W2598634450","https://openalex.org/W2696967604","https://openalex.org/W2726515241","https://openalex.org/W2784163702","https://openalex.org/W2808631503","https://openalex.org/W2890964092","https://openalex.org/W2896538040","https://openalex.org/W2969985801","https://openalex.org/W2972680151","https://openalex.org/W2972891548","https://openalex.org/W2980536810","https://openalex.org/W2989790778","https://openalex.org/W2997419692","https://openalex.org/W3008357631","https://openalex.org/W3015598461","https://openalex.org/W3016031604","https://openalex.org/W3025260599","https://openalex.org/W3095212884","https://openalex.org/W3097022478","https://openalex.org/W3103152812","https://openalex.org/W3105031100","https://openalex.org/W3112617506","https://openalex.org/W3128434617","https://openalex.org/W3160044950","https://openalex.org/W3162770427","https://openalex.org/W3171540622","https://openalex.org/W3178462146","https://openalex.org/W3196117288","https://openalex.org/W3203417382","https://openalex.org/W3205635414","https://openalex.org/W3208240190","https://openalex.org/W3212886388","https://openalex.org/W4206908380","https://openalex.org/W4230375458","https://openalex.org/W4245490569","https://openalex.org/W4283717211","https://openalex.org/W4286981691","https://openalex.org/W4313316127","https://openalex.org/W4319586574","https://openalex.org/W6620707391","https://openalex.org/W6631362777","https://openalex.org/W6675751002","https://openalex.org/W6684578312","https://openalex.org/W6688816777","https://openalex.org/W6735531217","https://openalex.org/W6764258251","https://openalex.org/W6767874007","https://openalex.org/W6767942481","https://openalex.org/W6769209353","https://openalex.org/W6784400926","https://openalex.org/W6787206693","https://openalex.org/W6802591457"],"related_works":["https://openalex.org/W3080491161","https://openalex.org/W159467828","https://openalex.org/W2976114323","https://openalex.org/W2977447808","https://openalex.org/W2112801327","https://openalex.org/W2912011375","https://openalex.org/W2056194206","https://openalex.org/W2085801246","https://openalex.org/W2293953271","https://openalex.org/W4240147959"],"abstract_inverted_index":{"In":[0,23],"clustering-based":[1],"speaker":[2,36,134,179],"diarization":[3],"systems,":[4],"the":[5,28,31,40,46,64,72,79,87,103,108,114,130,138,150,153,163,173,177,186,194,213,240,271,290,293,296],"embedding":[6,141,154,180],"clusters":[7,82,111],"for":[8,20,125,156,222],"distinctive":[9],"speakers":[10,158],"exhibit":[11],"wide":[12],"variability":[13,132],"in":[14,55,91,113,159,231,236,249,257],"size":[15,151],"and":[16,83,182,219],"density,":[17],"posing":[18],"difficulty":[19],"clustering":[21,52,105,139,188,199,277,300],"accuracy.":[22],"spite":[24],"of":[25,30,39,63,75,81,133,152,166,176,243,295],"this,":[26],"with":[27,215,228,282],"assistance":[29],"overall":[32],"distance":[33,61,164,174,195,217,241],"relationships":[34,62,165,218,242],"among":[35],"embeddings,":[37,135],"most":[38],"embeddings":[41,65,88,109,205,214,230,244],"can":[42,66,171,262],"be":[43,68],"grouped":[44],"to":[45,71,146,161,185,238],"correct":[47],"cluster":[48],"by":[49],"sophisticated":[50],"offline":[51,297],"algorithms.":[53],"However,":[54],"online":[56,93,104,126,187,198,233,252,267,276],"scenarios,":[57],"such":[58],"a":[59,122,207],"complete":[60],"not":[67],"obtained":[69],"due":[70],"incremental":[73],"arrival":[74],"embeddings.":[76,167],"Consequently,":[77],"determining":[78],"number":[80],"then":[84,202,220],"correctly":[85],"grouping":[86],"become":[89],"challenging":[90],"an":[92,232,251],"fashion.":[94,234],"Furthermore,":[95],"errors":[96],"would":[97],"accumulate":[98],"quickly":[99],"over":[100],"time":[101],"if":[102],"algorithm":[106,145,170,201],"assigns":[107],"into":[110],"erroneously":[112],"beginning.":[115],"To":[116,128],"address":[117],"these":[118],"problems,":[119],"we":[120,136],"designed":[121],"novel":[123],"framework":[124],"clustering.":[127,268],"reduce":[129],"high":[131],"proposed":[137,275],"guided":[140,197],"extractor":[142],"training":[143],"(CGEET)":[144],"encourage":[147],"similarity":[148],"between":[149],"space":[155,181],"different":[157],"attempt":[160],"simplify":[162],"The":[168],"CGEET":[169],"grasp":[172],"information":[175],"entire":[178],"provide":[183],"it":[184],"algorithm.":[189],"With":[190],"this":[191],"preliminary":[192],"information,":[193],"thresholds":[196],"(DTGOC)":[200],"processes":[203],"incoming":[204],"using":[206],"divide-and-conquer":[208],"approach.":[209],"It":[210],"first":[211],"handles":[212],"explicit":[216],"searches":[221],"possible":[223],"path":[224],"combination":[225],"they":[226],"have":[227],"remaining":[229],"Moreover,":[235],"order":[237],"utilize":[239],"that":[245],"are":[246],"far":[247],"apart":[248],"time,":[250],"re-clustering":[253],"strategy":[254],"is":[255,302],"incorporated":[256],"our":[258,274],"DTGOC":[259],"algorithm,":[260],"which":[261],"alleviate":[263],"error":[264],"accumulation":[265],"during":[266],"By":[269],"implementing":[270],"above":[272],"innovations,":[273],"system":[278,301],"achieves":[279],"14.00%":[280],"DER":[281,294],"collar":[283],"0.25":[284],"at":[285],"2.5":[286],"s":[287],"latency":[288],"on":[289],"AISHELL-4,":[291],"while":[292],"agglomerative":[298],"hierarchical":[299],"14.54%.":[303]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2025-10-10T00:00:00"}
