{"id":"https://openalex.org/W4405709573","doi":"https://doi.org/10.1109/iscslp63861.2024.10800669","title":"Online Neural Speaker Diarization with Spectral Clustering for Meeting Scenarios","display_name":"Online Neural Speaker Diarization with Spectral Clustering for Meeting Scenarios","publication_year":2024,"publication_date":"2024-11-07","ids":{"openalex":"https://openalex.org/W4405709573","doi":"https://doi.org/10.1109/iscslp63861.2024.10800669"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp63861.2024.10800669","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800669","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040565433","display_name":"Tianyou Cheng","orcid":"https://orcid.org/0000-0002-7012-520X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianyou Cheng","raw_affiliation_strings":["University of Science and Technology of China,Hefei"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018657729","display_name":"Maokui He","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Maokui He","raw_affiliation_strings":["University of Science and Technology of China,Hefei"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113145216","display_name":"Gaobin Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gaobin Yang","raw_affiliation_strings":["University of Science and Technology of China,Hefei"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064699619","display_name":"Shutong Niu","orcid":"https://orcid.org/0000-0003-0315-1042"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shutong Niu","raw_affiliation_strings":["University of Science and Technology of China,Hefei"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108843321","display_name":"Yanqiang Lei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yanqiang Lei","raw_affiliation_strings":["CVTE research"],"affiliations":[{"raw_affiliation_string":"CVTE research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045371931","display_name":"Limei Peng","orcid":"https://orcid.org/0000-0001-9984-9861"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Limei Peng","raw_affiliation_strings":["CVTE research"],"affiliations":[{"raw_affiliation_string":"CVTE research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101081618","display_name":"Jun Du","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Du","raw_affiliation_strings":["University of Science and Technology of China,Hefei"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5040565433"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.3316,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.6902948,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"373","last_page":"377"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9406999945640564,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.92330002784729,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.8800138235092163},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.711925745010376},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6589781641960144},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5888574719429016},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.49842119216918945},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4178740680217743},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39760327339172363}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.8800138235092163},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.711925745010376},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6589781641960144},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5888574719429016},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.49842119216918945},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4178740680217743},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39760327339172363}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp63861.2024.10800669","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800669","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W9688279","https://openalex.org/W67702164","https://openalex.org/W1511733513","https://openalex.org/W1608667326","https://openalex.org/W1965819578","https://openalex.org/W2038101708","https://openalex.org/W2117678320","https://openalex.org/W2150769028","https://openalex.org/W2159591770","https://openalex.org/W2403186097","https://openalex.org/W2638067502","https://openalex.org/W2890964092","https://openalex.org/W2896538040","https://openalex.org/W2952752702","https://openalex.org/W2962788625","https://openalex.org/W2963470929","https://openalex.org/W2963745998","https://openalex.org/W2972949456","https://openalex.org/W3008357631","https://openalex.org/W3015308613","https://openalex.org/W3025260599","https://openalex.org/W3097777922","https://openalex.org/W3162770427","https://openalex.org/W3178462146","https://openalex.org/W3207834622","https://openalex.org/W3212886388","https://openalex.org/W4283069218","https://openalex.org/W4297841423","https://openalex.org/W4312310839","https://openalex.org/W4319586574","https://openalex.org/W4362683557","https://openalex.org/W4392904174","https://openalex.org/W6857391484"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W2249138175","https://openalex.org/W4389984014","https://openalex.org/W2144208207","https://openalex.org/W1509309911","https://openalex.org/W1599425004","https://openalex.org/W2118860825","https://openalex.org/W2096510939"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"an":[3],"online":[4,27,49,128],"speaker":[5,16,83,87,91],"diarization":[6,17,84],"system":[7,112,129],"based":[8],"on":[9,113],"spectral":[10,28,102],"clustering":[11,29,50,71,103],"algorithm":[12],"and":[13,45,57,89,117,124],"a":[14,31],"neural":[15,82],"model.":[18],"To":[19],"efficiently":[20],"process":[21],"long-time":[22],"recordings,":[23],"we":[24,96,109],"perform":[25],"the":[26,53,58,76,79,115,121],"in":[30,78],"block-wise":[32],"manner.":[33],"We":[34],"use":[35,98],"two":[36],"core":[37],"samples,":[38,62],"one":[39],"of":[40,120],"which":[41,63],"contains":[42,60],"fewer":[43],"samples":[44,77],"is":[46],"used":[47,73],"for":[48],"to":[51,67,74,104],"meet":[52],"low":[54],"latency":[55],"requirement,":[56],"other":[59],"more":[61,69],"are":[64],"clustered":[65],"of-fline":[66],"obtain":[68,105],"precise":[70],"results":[72,99,132],"update":[75],"former.":[80],"The":[81],"model":[85],"requires":[86],"masks":[88],"target":[90],"embeddings":[92],"as":[93],"inputs,":[94],"so":[95],"can":[97],"obtained":[100],"from":[101],"these":[106],"components.":[107],"Additionally,":[108],"evaluate":[110],"our":[111,127],"both":[114],"evaluation":[116],"test":[118],"sets":[119],"AliMeeting":[122],"dataset":[123],"find":[125],"that":[126],"achieves":[130],"better":[131],"than":[133],"previous":[134],"work.":[135]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
