{"id":"https://openalex.org/W7127051914","doi":"https://doi.org/10.1111/exsy.70221","title":"On the Limitations of Speaker Diarization","display_name":"On the Limitations of Speaker Diarization","publication_year":2026,"publication_date":"2026-02-01","ids":{"openalex":"https://openalex.org/W7127051914","doi":"https://doi.org/10.1111/exsy.70221"},"language":"en","primary_location":{"id":"doi:10.1111/exsy.70221","is_oa":true,"landing_page_url":"https://doi.org/10.1111/exsy.70221","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1111/exsy.70221","source":{"id":"https://openalex.org/S72232612","display_name":"Expert Systems","issn_l":"0266-4720","issn":["0266-4720","1468-0394"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Expert Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1111/exsy.70221","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124742152","display_name":"Joana Amorim","orcid":null},"institutions":[{"id":"https://openalex.org/I129902397","display_name":"Dalhousie University","ror":"https://ror.org/01e6qks80","country_code":"CA","type":"education","lineage":["https://openalex.org/I129902397"]},{"id":"https://openalex.org/I4210127509","display_name":"Vector Institute","ror":"https://ror.org/03kqdja62","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210127509"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Joana Amorim","raw_affiliation_strings":["Faculty of Computer Science Dalhousie University  Halifax Nova Scotia Canada","Vector Institute for Artificial Intelligence  Toronto Ontario Canada"],"raw_orcid":"https://orcid.org/0000-0003-1359-2095","affiliations":[{"raw_affiliation_string":"Faculty of Computer Science Dalhousie University  Halifax Nova Scotia Canada","institution_ids":["https://openalex.org/I129902397"]},{"raw_affiliation_string":"Vector Institute for Artificial Intelligence  Toronto Ontario Canada","institution_ids":["https://openalex.org/I4210127509"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124733678","display_name":"Jo\u00e3o Pimentel","orcid":null},"institutions":[{"id":"https://openalex.org/I129902397","display_name":"Dalhousie University","ror":"https://ror.org/01e6qks80","country_code":"CA","type":"education","lineage":["https://openalex.org/I129902397"]},{"id":"https://openalex.org/I4210127509","display_name":"Vector Institute","ror":"https://ror.org/03kqdja62","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210127509"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jo\u00e3o Pimentel","raw_affiliation_strings":["Faculty of Computer Science Dalhousie University  Halifax Nova Scotia Canada","Vector Institute for Artificial Intelligence  Toronto Ontario Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science Dalhousie University  Halifax Nova Scotia Canada","institution_ids":["https://openalex.org/I129902397"]},{"raw_affiliation_string":"Vector Institute for Artificial Intelligence  Toronto Ontario Canada","institution_ids":["https://openalex.org/I4210127509"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124696918","display_name":"Frank Rudzicz","orcid":null},"institutions":[{"id":"https://openalex.org/I129902397","display_name":"Dalhousie University","ror":"https://ror.org/01e6qks80","country_code":"CA","type":"education","lineage":["https://openalex.org/I129902397"]},{"id":"https://openalex.org/I4210127509","display_name":"Vector Institute","ror":"https://ror.org/03kqdja62","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210127509"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Frank Rudzicz","raw_affiliation_strings":["Faculty of Computer Science Dalhousie University  Halifax Nova Scotia Canada","Vector Institute for Artificial Intelligence  Toronto Ontario Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science Dalhousie University  Halifax Nova Scotia Canada","institution_ids":["https://openalex.org/I129902397"]},{"raw_affiliation_string":"Vector Institute for Artificial Intelligence  Toronto Ontario Canada","institution_ids":["https://openalex.org/I4210127509"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5124742152"],"corresponding_institution_ids":["https://openalex.org/I129902397","https://openalex.org/I4210127509"],"apc_list":{"value":3860,"currency":"USD","value_usd":3860},"apc_paid":{"value":3860,"currency":"USD","value_usd":3860},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20866899,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"43","issue":"3","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7566999793052673,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7566999793052673,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.07859999686479568,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.04050000011920929,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.8794999718666077},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6675999760627747},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.6244000196456909},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.517300009727478},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.47920000553131104},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.42250001430511475}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.8794999718666077},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8773000240325928},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6807000041007996},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6675999760627747},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.6244000196456909},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.517300009727478},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.47920000553131104},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.42250001430511475},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.41510000824928284},{"id":"https://openalex.org/C2986627078","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker identification","level":3,"score":0.3878999948501587},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37130001187324524},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.34880000352859497},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1111/exsy.70221","is_oa":true,"landing_page_url":"https://doi.org/10.1111/exsy.70221","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1111/exsy.70221","source":{"id":"https://openalex.org/S72232612","display_name":"Expert Systems","issn_l":"0266-4720","issn":["0266-4720","1468-0394"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Expert Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1111/exsy.70221","is_oa":true,"landing_page_url":"https://doi.org/10.1111/exsy.70221","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1111/exsy.70221","source":{"id":"https://openalex.org/S72232612","display_name":"Expert Systems","issn_l":"0266-4720","issn":["0266-4720","1468-0394"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Expert Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5809192657470703,"display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309949","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95"},{"id":"https://openalex.org/F4320323180","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W7127051914.pdf"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W3765491","https://openalex.org/W2125336414","https://openalex.org/W2141262306","https://openalex.org/W2163922914","https://openalex.org/W2192412620","https://openalex.org/W2902864383","https://openalex.org/W2973127116","https://openalex.org/W2997419692","https://openalex.org/W3024869864","https://openalex.org/W3038871978","https://openalex.org/W3095822285","https://openalex.org/W3160747466","https://openalex.org/W3196117288","https://openalex.org/W3196595845","https://openalex.org/W3197674197","https://openalex.org/W3203417382","https://openalex.org/W3204618061","https://openalex.org/W3205878676","https://openalex.org/W3212886388","https://openalex.org/W4225890157","https://openalex.org/W4237168004","https://openalex.org/W4292470341","https://openalex.org/W4296069155","https://openalex.org/W4296069335","https://openalex.org/W4297841362","https://openalex.org/W4297841547","https://openalex.org/W4378675609","https://openalex.org/W4385822293","https://openalex.org/W4385822353","https://openalex.org/W4385822384","https://openalex.org/W4387865047","https://openalex.org/W4392636793"],"related_works":[],"abstract_inverted_index":{"ABSTRACT":[0],"Although":[1],"speaker":[2,85,95,121],"diarization":[3,37,49,56],"has":[4],"evolved":[5],"to":[6,39,150,160],"be":[7],"more":[8,11],"robust":[9],"and":[10,52,55,79,106,117,134,163,174],"refined,":[12],"including":[13],"incorporating":[14],"modern":[15],"automatic":[16],"speech":[17,147,171,175],"recognition":[18],"(ASR),":[19],"current":[20,36],"systems":[21,38],"still":[22],"suffer":[23],"from":[24],"several":[25],"disruptive":[26],"factors,":[27],"like":[28],"noise.":[29],"We":[30,67,128,144],"comprehensively":[31],"evaluate":[32],"the":[33,41,69,90,94,120,140,170],"limitations":[34],"of":[35,62,71,76,92],"uncover":[40,152],"underlying":[42,153],"causes":[43],"that":[44,104,118,155],"hinder":[45,84],"accuracy.":[46],"Five":[47],"open\u2010source":[48],"pipelines\u2014both":[50],"diarization\u2010only":[51,77,105],"joint":[53,72,107],"ASR":[54],"systems\u2014are":[57],"assessed":[58],"on":[59],"a":[60],"set":[61],"heterogeneous":[63],"benchmark":[64],"data":[65],"sets.":[66],"compare":[68],"performance":[70,126],"pipelines":[73],"against":[74],"those":[75],"systems,":[78],"analyse":[80],"which":[81],"audio":[82,132],"characteristics":[83],"discrimination,":[86],"as":[87,89,97,139],"well":[88],"impact":[91],"using":[93,146],"count":[96,122],"an":[98],"input":[99],"parameter.":[100],"Our":[101],"results":[102],"indicate":[103],"approaches":[108],"are":[109],"competitive":[110],"with":[111],"each":[112],"other":[113],"in":[114],"unsupervised":[115],"scenarios,":[116],"providing":[119],"does":[123],"not":[124],"improve":[125],"consistently.":[127],"also":[129],"identify":[130],"short":[131],"duration":[133],"low":[135],"speech\u2010to\u2010noise":[136],"ratio":[137],"(SNR)":[138],"most":[141],"impairing":[142],"properties.":[143],"recommend":[145],"representation":[148],"learning":[149],"further":[151],"factors":[154],"affect":[156],"diarization,":[157],"pre\u2010processing":[158],"techniques":[159],"remove":[161],"noise,":[162],"performing":[164],"hyper\u2010parameter":[165],"tuning":[166],"on,":[167],"for":[168],"example,":[169],"window":[172],"length,":[173],"detection":[176],"thresholds.":[177]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2026-02-03T00:00:00"}
