{"id":"https://openalex.org/W3095212884","doi":"https://doi.org/10.21437/interspeech.2020-1022","title":"End-to-End Speaker Diarization for an Unknown Number of Speakers with Encoder-Decoder Based Attractors","display_name":"End-to-End Speaker Diarization for an Unknown Number of Speakers with Encoder-Decoder Based Attractors","publication_year":2020,"publication_date":"2020-10-25","ids":{"openalex":"https://openalex.org/W3095212884","doi":"https://doi.org/10.21437/interspeech.2020-1022","mag":"3095212884"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2020-1022","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1022","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026324656","display_name":"Shota Horiguchi","orcid":"https://orcid.org/0000-0002-3166-4956"},"institutions":[{"id":"https://openalex.org/I4210089357","display_name":"Hitachi (United Kingdom)","ror":"https://ror.org/0097wyf31","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210089357","https://openalex.org/I65143321"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shota Horiguchi","raw_affiliation_strings":["Hitachi"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hitachi","institution_ids":["https://openalex.org/I4210089357"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044818016","display_name":"Yusuke Fujita","orcid":"https://orcid.org/0000-0002-6523-8146"},"institutions":[{"id":"https://openalex.org/I4210089357","display_name":"Hitachi (United Kingdom)","ror":"https://ror.org/0097wyf31","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210089357","https://openalex.org/I65143321"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yusuke Fujita","raw_affiliation_strings":["Hitachi"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hitachi","institution_ids":["https://openalex.org/I4210089357"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Johns Hopkins University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101012633","display_name":"Yawen Xue","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089357","display_name":"Hitachi (United Kingdom)","ror":"https://ror.org/0097wyf31","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210089357","https://openalex.org/I65143321"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yawen Xue","raw_affiliation_strings":["Hitachi"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hitachi","institution_ids":["https://openalex.org/I4210089357"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076987349","display_name":"Kenji Nagamatsu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089357","display_name":"Hitachi (United Kingdom)","ror":"https://ror.org/0097wyf31","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210089357","https://openalex.org/I65143321"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kenji Nagamatsu","raw_affiliation_strings":["Hitachi"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hitachi","institution_ids":["https://openalex.org/I4210089357"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":14.3537,"has_fulltext":false,"cited_by_count":166,"citation_normalized_percentile":{"value":0.99152929,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"269","last_page":"273"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6798264384269714},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6568053364753723},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.6407725811004639},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.49973225593566895},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.460595965385437},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2835713326931},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.2583417296409607}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6798264384269714},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6568053364753723},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.6407725811004639},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.49973225593566895},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.460595965385437},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2835713326931},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.2583417296409607},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2020-1022","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1022","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1574017590","https://openalex.org/W1965819578","https://openalex.org/W2038101708","https://openalex.org/W2115599677","https://openalex.org/W2130942839","https://openalex.org/W2148613904","https://openalex.org/W2159094433","https://openalex.org/W2219249508","https://openalex.org/W2236062296","https://openalex.org/W2288817436","https://openalex.org/W2460742184","https://openalex.org/W2558649592","https://openalex.org/W2696967604","https://openalex.org/W2735663686","https://openalex.org/W2870589900","https://openalex.org/W2889418727","https://openalex.org/W2890964092","https://openalex.org/W2891054259","https://openalex.org/W2896538040","https://openalex.org/W2938358845","https://openalex.org/W2952752702","https://openalex.org/W2953273646","https://openalex.org/W2963470929","https://openalex.org/W2972449503","https://openalex.org/W2972680151","https://openalex.org/W2972712416","https://openalex.org/W2972756562","https://openalex.org/W2972767900","https://openalex.org/W2972949456","https://openalex.org/W2981087920","https://openalex.org/W2981608174","https://openalex.org/W3004597053","https://openalex.org/W3008357631","https://openalex.org/W3010196324","https://openalex.org/W3015544392","https://openalex.org/W3016031604","https://openalex.org/W3034729383","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4308164622","https://openalex.org/W4297415750","https://openalex.org/W2952010730","https://openalex.org/W3209371554","https://openalex.org/W3094723376","https://openalex.org/W3206479921","https://openalex.org/W3015923889","https://openalex.org/W4319663602","https://openalex.org/W3097290232","https://openalex.org/W3025798172"],"abstract_inverted_index":{"End-to-end":[0],"speaker":[1,16,21,79,92],"diarization":[2,17,93,105],"for":[3,43],"an":[4],"unknown":[5,133],"number":[6,36,54,77],"of":[7,34,37,55,78,121,135],"speakers":[8,136],"is":[9,29,84],"addressed":[10],"in":[11,32],"this":[12],"paper.Recently":[13],"proposed":[14],"end-toend":[15],"outperformed":[18],"conventional":[19,88],"clusteringbased":[20],"diarization,":[22],"but":[23],"it":[24,28],"has":[25],"one":[26],"drawback:":[27],"less":[30],"flexible":[31,53],"terms":[33],"the":[35,62,69,75,87,118,148],"speakers.This":[38],"paper":[39],"proposes":[40],"a":[41,52,58,96,102,113,141,153],"method":[42,100,139,151],"encoder-decoder":[44],"based":[45],"attractor":[46],"calculation":[47],"(EDA),":[48],"which":[49],"first":[50],"generates":[51],"attractors":[56,65],"from":[57],"speech":[59,70,81],"embedding":[60,71,82],"sequence.Then,":[61],"generated":[63],"multiple":[64],"are":[66],"multiplied":[67],"by":[68],"sequence":[72,83],"to":[73],"produce":[74],"same":[76],"activities.The":[80],"extracted":[85],"using":[86],"self-attentive":[89],"end-to-end":[90],"neural":[91],"(SA-EEND)":[94],"network.In":[95],"two-speaker":[97,119],"condition,":[98],"our":[99,138],"achieved":[101,152],"2.69":[103],"%":[104,115,128,143,155],"error":[106],"rate":[107],"(DER)":[108],"on":[109,117,145],"simulated":[110],"mixtures":[111],"and":[112,129],"8.07":[114],"DER":[116,144],"subset":[120],"CALLHOME,":[122,146],"while":[123,147],"vanilla":[124],"SA-EEND":[125],"attained":[126,140],"4.56":[127],"9.54":[130],"%,":[131],"respectively.In":[132],"numbers":[134],"conditions,":[137],"15.29":[142],"x-vectorbased":[149],"clustering":[150],"19.43":[154],"DER.":[156]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":28},{"year":2024,"cited_by_count":26},{"year":2023,"cited_by_count":29},{"year":2022,"cited_by_count":37},{"year":2021,"cited_by_count":35},{"year":2020,"cited_by_count":5}],"updated_date":"2026-06-16T09:24:06.705377","created_date":"2025-10-10T00:00:00"}
