{"id":"https://openalex.org/W4372349651","doi":"https://doi.org/10.1109/icassp49357.2023.10094824","title":"Neural Diarization with Non-Autoregressive Intermediate Attractors","display_name":"Neural Diarization with Non-Autoregressive Intermediate Attractors","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372349651","doi":"https://doi.org/10.1109/icassp49357.2023.10094824"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10094824","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094824","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044818016","display_name":"Yusuke Fujita","orcid":"https://orcid.org/0000-0002-6523-8146"},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yusuke Fujita","raw_affiliation_strings":["LINE Corporation,Japan","LINE Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"LINE Corporation,Japan","institution_ids":["https://openalex.org/I4210096607"]},{"raw_affiliation_string":"LINE Corporation, Japan","institution_ids":["https://openalex.org/I4210096607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113730169","display_name":"Tatsuya Komatsu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tatsuya Komatsu","raw_affiliation_strings":["LINE Corporation,Japan","LINE Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"LINE Corporation,Japan","institution_ids":["https://openalex.org/I4210096607"]},{"raw_affiliation_string":"LINE Corporation, Japan","institution_ids":["https://openalex.org/I4210096607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020401831","display_name":"Robin Scheibler","orcid":"https://orcid.org/0000-0002-5205-8365"},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Robin Scheibler","raw_affiliation_strings":["LINE Corporation,Japan","LINE Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"LINE Corporation,Japan","institution_ids":["https://openalex.org/I4210096607"]},{"raw_affiliation_string":"LINE Corporation, Japan","institution_ids":["https://openalex.org/I4210096607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109488356","display_name":"Yusuke Kida","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yusuke Kida","raw_affiliation_strings":["LINE Corporation,Japan","LINE Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"LINE Corporation,Japan","institution_ids":["https://openalex.org/I4210096607"]},{"raw_affiliation_string":"LINE Corporation, Japan","institution_ids":["https://openalex.org/I4210096607"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087632404","display_name":"Tetsuji Ogawa","orcid":"https://orcid.org/0000-0002-7316-2073"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tetsuji Ogawa","raw_affiliation_strings":["Waseda University,Japan","Waseda University, Japan"],"affiliations":[{"raw_affiliation_string":"Waseda University,Japan","institution_ids":["https://openalex.org/I150744194"]},{"raw_affiliation_string":"Waseda University, Japan","institution_ids":["https://openalex.org/I150744194"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5044818016"],"corresponding_institution_ids":["https://openalex.org/I4210096607"],"apc_list":null,"apc_paid":null,"fwci":1.7457,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.87281336,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.8437615036964417},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7874636054039001},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.7232606410980225},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6627702713012695},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.6343075037002563},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5462403893470764},{"id":"https://openalex.org/keywords/nonlinear-autoregressive-exogenous-model","display_name":"Nonlinear autoregressive exogenous model","score":0.5451076626777649},{"id":"https://openalex.org/keywords/attractor","display_name":"Attractor","score":0.4976988136768341},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.46065565943717957},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4561939835548401},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38970643281936646},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.29312556982040405},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1103975772857666},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.05810302495956421}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.8437615036964417},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7874636054039001},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.7232606410980225},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6627702713012695},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.6343075037002563},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5462403893470764},{"id":"https://openalex.org/C42536954","wikidata":"https://www.wikidata.org/wiki/Q7049462","display_name":"Nonlinear autoregressive exogenous model","level":3,"score":0.5451076626777649},{"id":"https://openalex.org/C164380108","wikidata":"https://www.wikidata.org/wiki/Q507187","display_name":"Attractor","level":2,"score":0.4976988136768341},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.46065565943717957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4561939835548401},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38970643281936646},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.29312556982040405},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1103975772857666},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.05810302495956421},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10094824","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094824","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W123007118","https://openalex.org/W1485783873","https://openalex.org/W1591607137","https://openalex.org/W1965819578","https://openalex.org/W2038101708","https://openalex.org/W2148613904","https://openalex.org/W2159591770","https://openalex.org/W2170579896","https://openalex.org/W2219249508","https://openalex.org/W2221409856","https://openalex.org/W2638067502","https://openalex.org/W2889418727","https://openalex.org/W2890964092","https://openalex.org/W2962788625","https://openalex.org/W2963470929","https://openalex.org/W2972449503","https://openalex.org/W2972949456","https://openalex.org/W3008357631","https://openalex.org/W3038871978","https://openalex.org/W3095212884","https://openalex.org/W3160044950","https://openalex.org/W3162249256","https://openalex.org/W3196595845","https://openalex.org/W3197140813","https://openalex.org/W3197916665","https://openalex.org/W3206479921","https://openalex.org/W3206573929","https://openalex.org/W3212886388","https://openalex.org/W4220731890","https://openalex.org/W4297841362","https://openalex.org/W4385245566","https://openalex.org/W6681922338","https://openalex.org/W6688816777","https://openalex.org/W6739901393"],"related_works":["https://openalex.org/W2606910468","https://openalex.org/W3116827148","https://openalex.org/W3120843198","https://openalex.org/W2154965898","https://openalex.org/W2036704594","https://openalex.org/W4226315710","https://openalex.org/W3083782034","https://openalex.org/W4287185323","https://openalex.org/W2995801509","https://openalex.org/W2275178414"],"abstract_inverted_index":{"End-to-end":[0],"neural":[1,23],"diarization":[2,17,123],"(EEND)":[3],"with":[4,20,77,105,115,128],"encoder-decoder-based":[5],"attractors":[6,63,120],"(EDA)":[7],"is":[8],"a":[9,21,46,86],"promising":[10],"method":[11,59,127],"to":[12,64,96],"handle":[13],"the":[14,26,52,69,74,81,89,97,106,112,116,122,129,135],"whole":[15,98],"speaker":[16,33,66,90],"problem":[18],"simultaneously":[19],"single":[22],"network.":[24],"While":[25,80],"EEND":[27,48],"model":[28,49,83],"can":[29],"produce":[30,65],"all":[31],"frame-level":[32],"labels":[34,67,91,114],"simultaneously,":[35],"it":[36],"disregards":[37],"output":[38],"label":[39,53],"dependency.":[40],"In":[41],"this":[42],"work,":[43],"we":[44],"propose":[45],"novel":[47],"that":[50,111],"introduces":[51],"dependency":[54],"between":[55],"frames.":[56],"The":[57,103,125],"proposed":[58,82,117,126],"generates":[60],"non-autoregressive":[61,87,118],"intermediate":[62,101,113,119,136],"at":[68],"lower":[70],"layers":[71,76],"and":[72,142],"conditions":[73],"subsequent":[75],"these":[78],"labels.":[79,102],"works":[84],"in":[85,139],"manner,":[88],"are":[92],"refined":[93],"by":[94],"referring":[95],"sequence":[99],"of":[100],"experiments":[104],"two-speaker":[107],"CALLHOME":[108],"dataset":[109],"show":[110],"boost":[121],"performance.":[124],"deeper":[130],"net-work":[131],"benefits":[132],"more":[133],"from":[134],"labels,":[137],"resulting":[138],"better":[140],"performance":[141],"training":[143],"throughput":[144],"than":[145],"EEND-EDA.":[146]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
