{"id":"https://openalex.org/W4225661121","doi":"https://doi.org/10.1109/asru51503.2021.9687875","title":"Towards Neural Diarization for Unlimited Numbers of Speakers Using Global and Local Attractors","display_name":"Towards Neural Diarization for Unlimited Numbers of Speakers Using Global and Local Attractors","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4225661121","doi":"https://doi.org/10.1109/asru51503.2021.9687875"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9687875","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9687875","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026324656","display_name":"Shota Horiguchi","orcid":"https://orcid.org/0000-0002-3166-4956"},"institutions":[{"id":"https://openalex.org/I65143321","display_name":"Hitachi (Japan)","ror":"https://ror.org/02exqgm79","country_code":"JP","type":"company","lineage":["https://openalex.org/I65143321"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Shota Horiguchi","raw_affiliation_strings":["Hitachi, Ltd.,Japan","Hitachi, Ltd., Japan"],"affiliations":[{"raw_affiliation_string":"Hitachi, Ltd.,Japan","institution_ids":["https://openalex.org/I65143321"]},{"raw_affiliation_string":"Hitachi, Ltd., Japan","institution_ids":["https://openalex.org/I65143321"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Carnegie Mellon University,USA","Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059858850","display_name":"Leibny Paola Garcia","orcid":"https://orcid.org/0000-0002-7449-5726"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Paola Garcia","raw_affiliation_strings":["Johns Hopkins University,USA","Johns Hopkins University, USA"],"affiliations":[{"raw_affiliation_string":"Johns Hopkins University,USA","institution_ids":["https://openalex.org/I145311948"]},{"raw_affiliation_string":"Johns Hopkins University, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101012633","display_name":"Yawen Xue","orcid":null},"institutions":[{"id":"https://openalex.org/I65143321","display_name":"Hitachi (Japan)","ror":"https://ror.org/02exqgm79","country_code":"JP","type":"company","lineage":["https://openalex.org/I65143321"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yawen Xue","raw_affiliation_strings":["Hitachi, Ltd.,Japan","Hitachi, Ltd., Japan"],"affiliations":[{"raw_affiliation_string":"Hitachi, Ltd.,Japan","institution_ids":["https://openalex.org/I65143321"]},{"raw_affiliation_string":"Hitachi, Ltd., Japan","institution_ids":["https://openalex.org/I65143321"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087545401","display_name":"Yuki Takashima","orcid":"https://orcid.org/0000-0001-8489-9487"},"institutions":[{"id":"https://openalex.org/I65143321","display_name":"Hitachi (Japan)","ror":"https://ror.org/02exqgm79","country_code":"JP","type":"company","lineage":["https://openalex.org/I65143321"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuki Takashima","raw_affiliation_strings":["Hitachi, Ltd.,Japan","Hitachi, Ltd., Japan"],"affiliations":[{"raw_affiliation_string":"Hitachi, Ltd.,Japan","institution_ids":["https://openalex.org/I65143321"]},{"raw_affiliation_string":"Hitachi, Ltd., Japan","institution_ids":["https://openalex.org/I65143321"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052394367","display_name":"Yohei Kawaguchi","orcid":"https://orcid.org/0000-0002-2329-5441"},"institutions":[{"id":"https://openalex.org/I65143321","display_name":"Hitachi (Japan)","ror":"https://ror.org/02exqgm79","country_code":"JP","type":"company","lineage":["https://openalex.org/I65143321"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yohei Kawaguchi","raw_affiliation_strings":["Hitachi, Ltd.,Japan","Hitachi, Ltd., Japan"],"affiliations":[{"raw_affiliation_string":"Hitachi, Ltd.,Japan","institution_ids":["https://openalex.org/I65143321"]},{"raw_affiliation_string":"Hitachi, Ltd., Japan","institution_ids":["https://openalex.org/I65143321"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5026324656"],"corresponding_institution_ids":["https://openalex.org/I65143321"],"apc_list":null,"apc_paid":null,"fwci":3.4256,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.94046959,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"98","last_page":"105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.92570561170578},{"id":"https://openalex.org/keywords/subsequence","display_name":"Subsequence","score":0.8653810024261475},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7190689444541931},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6640734672546387},{"id":"https://openalex.org/keywords/attractor","display_name":"Attractor","score":0.5562842488288879},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.510415256023407},{"id":"https://openalex.org/keywords/longest-common-subsequence-problem","display_name":"Longest common subsequence problem","score":0.4906393587589264},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4678429365158081},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4480941891670227},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.3164905905723572},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2598797678947449},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21782457828521729}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.92570561170578},{"id":"https://openalex.org/C137877099","wikidata":"https://www.wikidata.org/wiki/Q1332977","display_name":"Subsequence","level":3,"score":0.8653810024261475},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7190689444541931},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6640734672546387},{"id":"https://openalex.org/C164380108","wikidata":"https://www.wikidata.org/wiki/Q507187","display_name":"Attractor","level":2,"score":0.5562842488288879},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.510415256023407},{"id":"https://openalex.org/C120098539","wikidata":"https://www.wikidata.org/wiki/Q141001","display_name":"Longest common subsequence problem","level":2,"score":0.4906393587589264},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4678429365158081},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4480941891670227},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.3164905905723572},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2598797678947449},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21782457828521729},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9687875","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9687875","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W2134089414","https://openalex.org/W2163823641","https://openalex.org/W2221409856","https://openalex.org/W2460742184","https://openalex.org/W2558649592","https://openalex.org/W2605161420","https://openalex.org/W2891405874","https://openalex.org/W2896538040","https://openalex.org/W2952218014","https://openalex.org/W2962935966","https://openalex.org/W2963167763","https://openalex.org/W2963470929","https://openalex.org/W2972449503","https://openalex.org/W2972767900","https://openalex.org/W2973127116","https://openalex.org/W2981087920","https://openalex.org/W2997419692","https://openalex.org/W3008283340","https://openalex.org/W3008357631","https://openalex.org/W3015544392","https://openalex.org/W3015780472","https://openalex.org/W3016232124","https://openalex.org/W3016244460","https://openalex.org/W3020336359","https://openalex.org/W3024085360","https://openalex.org/W3024104148","https://openalex.org/W3025260599","https://openalex.org/W3033627755","https://openalex.org/W3035268204","https://openalex.org/W3095212884","https://openalex.org/W3096090308","https://openalex.org/W3099330747","https://openalex.org/W3105031100","https://openalex.org/W3128688470","https://openalex.org/W3142516134","https://openalex.org/W3144086690","https://openalex.org/W3145204487","https://openalex.org/W3160044950","https://openalex.org/W3163019736","https://openalex.org/W3163903701","https://openalex.org/W3174648740","https://openalex.org/W3178462146","https://openalex.org/W3185109982","https://openalex.org/W3196595845","https://openalex.org/W3196857193","https://openalex.org/W3197916665","https://openalex.org/W3212886388","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6679849079","https://openalex.org/W6736147098","https://openalex.org/W6739901393","https://openalex.org/W6774995033","https://openalex.org/W6779069803","https://openalex.org/W6790122275","https://openalex.org/W6790297703","https://openalex.org/W6797229353"],"related_works":["https://openalex.org/W25732909","https://openalex.org/W4323338832","https://openalex.org/W4289596129","https://openalex.org/W170643605","https://openalex.org/W1999879627","https://openalex.org/W4295189757","https://openalex.org/W2088055539","https://openalex.org/W1574834681","https://openalex.org/W2389167168","https://openalex.org/W1746697883"],"abstract_inverted_index":{"Attractor-based":[0],"end-to-end":[1,65,184],"diarization":[2,82,88,115,148,185],"is":[3,21,34,43,93,137,179],"achieving":[4],"comparable":[5],"accuracy":[6],"to":[7,113],"the":[8,18,27,30,37,63,99,103,107,124,129,167,182],"carefully":[9],"tuned":[10],"conventional":[11,183],"clustering-based":[12],"methods":[13],"on":[14,49,166],"challenging":[15],"datasets.":[16],"However,":[17],"main":[19],"drawback":[20],"that":[22,142],"it":[23,111],"cannot":[24],"deal":[25],"with":[26],"case":[28],"where":[29],"number":[31,120,130,153],"of":[32,72,98,117,121,131,150,154,177],"speakers":[33,122,133],"larger":[35],"than":[36,181],"one":[38],"observed":[39],"during":[40],"training.":[41],"This":[42,109],"because":[44],"its":[45],"speaker":[46,91],"counting":[47],"relies":[48],"supervised":[50],"learning.":[51],"In":[52],"this":[53],"work,":[54],"we":[55],"introduce":[56],"an":[57,151],"unsupervised":[58,96],"clustering":[59,97],"process":[60],"embedded":[61],"in":[62],"attractor-based":[64,81],"diarization.":[66],"We":[67],"first":[68],"split":[69],"a":[70,118],"sequence":[71],"frame-wise":[73],"embeddings":[74],"into":[75],"short":[76],"subsequences":[77],"and":[78,163,171],"then":[79],"perform":[80],"for":[83,123,134],"each":[84,135,176],"subsequence.":[85],"Given":[86],"subsequence-wise":[87],"results,":[89],"inter-subsequence":[90],"correspondence":[92],"obtained":[94],"by":[95],"vectors":[100],"computed":[101],"from":[102,105],"attractors":[104],"all":[106],"subsequences.":[108],"makes":[110],"possible":[112],"produce":[114,146],"results":[116,140,149],"large":[119],"whole":[125],"recording":[126],"even":[127],"if":[128],"output":[132],"subsequence":[136],"limited.":[138],"Experimental":[139],"showed":[141],"our":[143],"method":[144,157],"could":[145],"accurate":[147],"unseen":[152],"speakers.":[155],"Our":[156],"achieved":[158],"11.84":[159],"%,":[160,162],"28.33":[161],"19.49":[164],"%":[165],"CALLHOME,":[168],"DI-HARD":[169],"II,":[170],"DIHARD":[172],"III":[173],"datasets,":[174],"respectively,":[175],"which":[178],"better":[180],"methods.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":8}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
