{"id":"https://openalex.org/W2038162002","doi":"https://doi.org/10.1109/tasl.2011.2159710","title":"A Comparative Study of Bottom-Up and Top-Down Approaches to Speaker Diarization","display_name":"A Comparative Study of Bottom-Up and Top-Down Approaches to Speaker Diarization","publication_year":2012,"publication_date":"2012-01-31","ids":{"openalex":"https://openalex.org/W2038162002","doi":"https://doi.org/10.1109/tasl.2011.2159710","mag":"2038162002"},"language":"en","primary_location":{"id":"doi:10.1109/tasl.2011.2159710","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tasl.2011.2159710","pdf_url":null,"source":{"id":"https://openalex.org/S199497470","display_name":"IEEE Transactions on Audio Speech and Language Processing","issn_l":"1558-7916","issn":["1558-7916","1558-7924"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066811192","display_name":"Nicholas Evans","orcid":"https://orcid.org/0000-0002-8459-1041"},"institutions":[{"id":"https://openalex.org/I1902872","display_name":"EURECOM","ror":"https://ror.org/00sse7z02","country_code":"FR","type":"education","lineage":["https://openalex.org/I1902872","https://openalex.org/I205703379"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Nicholas Evans","raw_affiliation_strings":["Department of Multimedia Communications, EURECOM, Sophia-Antipolis, France","Dept. of Multimedia Commun., EURECOM, Sophia Antipolis, France"],"affiliations":[{"raw_affiliation_string":"Department of Multimedia Communications, EURECOM, Sophia-Antipolis, France","institution_ids":["https://openalex.org/I1902872"]},{"raw_affiliation_string":"Dept. of Multimedia Commun., EURECOM, Sophia Antipolis, France","institution_ids":["https://openalex.org/I1902872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010332658","display_name":"Simon Bozonnet","orcid":null},"institutions":[{"id":"https://openalex.org/I1902872","display_name":"EURECOM","ror":"https://ror.org/00sse7z02","country_code":"FR","type":"education","lineage":["https://openalex.org/I1902872","https://openalex.org/I205703379"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Simon Bozonnet","raw_affiliation_strings":["Department of Multimedia Communications, EURECOM, Sophia-Antipolis, France","Dept. of Multimedia Commun., EURECOM, Sophia Antipolis, France"],"affiliations":[{"raw_affiliation_string":"Department of Multimedia Communications, EURECOM, Sophia-Antipolis, France","institution_ids":["https://openalex.org/I1902872"]},{"raw_affiliation_string":"Dept. of Multimedia Commun., EURECOM, Sophia Antipolis, France","institution_ids":["https://openalex.org/I1902872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100391494","display_name":"Dong Wang","orcid":"https://orcid.org/0000-0002-6992-7950"},"institutions":[{"id":"https://openalex.org/I1902872","display_name":"EURECOM","ror":"https://ror.org/00sse7z02","country_code":"FR","type":"education","lineage":["https://openalex.org/I1902872","https://openalex.org/I205703379"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Dong Wang","raw_affiliation_strings":["Department of Multimedia Communications, EURECOM, Sophia-Antipolis, France","Dept. of Multimedia Commun., EURECOM, Sophia Antipolis, France"],"affiliations":[{"raw_affiliation_string":"Department of Multimedia Communications, EURECOM, Sophia-Antipolis, France","institution_ids":["https://openalex.org/I1902872"]},{"raw_affiliation_string":"Dept. of Multimedia Commun., EURECOM, Sophia Antipolis, France","institution_ids":["https://openalex.org/I1902872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018458807","display_name":"Corinne Fredouille","orcid":"https://orcid.org/0000-0002-0413-8950"},"institutions":[{"id":"https://openalex.org/I198415970","display_name":"Universit\u00e9 d'Avignon et des Pays de Vaucluse","ror":"https://ror.org/00mfpxb84","country_code":"FR","type":"education","lineage":["https://openalex.org/I198415970"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Corinne Fredouille","raw_affiliation_strings":["CERI/LIA, University of Avignon, Avignon, France"],"affiliations":[{"raw_affiliation_string":"CERI/LIA, University of Avignon, Avignon, France","institution_ids":["https://openalex.org/I198415970"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022574838","display_name":"Rapha\u00ebl Troncy","orcid":"https://orcid.org/0000-0003-0457-1436"},"institutions":[{"id":"https://openalex.org/I1902872","display_name":"EURECOM","ror":"https://ror.org/00sse7z02","country_code":"FR","type":"education","lineage":["https://openalex.org/I1902872","https://openalex.org/I205703379"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Rapha\u00ebl Troncy","raw_affiliation_strings":["Department of Multimedia Communications, EURECOM, Sophia-Antipolis, France","Dept. of Multimedia Commun., EURECOM, Sophia Antipolis, France"],"affiliations":[{"raw_affiliation_string":"Department of Multimedia Communications, EURECOM, Sophia-Antipolis, France","institution_ids":["https://openalex.org/I1902872"]},{"raw_affiliation_string":"Dept. of Multimedia Commun., EURECOM, Sophia Antipolis, France","institution_ids":["https://openalex.org/I1902872"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5066811192"],"corresponding_institution_ids":["https://openalex.org/I1902872"],"apc_list":null,"apc_paid":null,"fwci":7.7064,"has_fulltext":false,"cited_by_count":43,"citation_normalized_percentile":{"value":0.97234257,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"20","issue":"2","first_page":"382","last_page":"392"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8463759422302246},{"id":"https://openalex.org/keywords/nist","display_name":"NIST","score":0.8104465007781982},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7571349143981934},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.5936971306800842},{"id":"https://openalex.org/keywords/top-down-and-bottom-up-design","display_name":"Top-down and bottom-up design","score":0.5443775653839111},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5401448607444763},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5121588706970215},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.491342693567276},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4805322289466858},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.41530752182006836},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.41465625166893005},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4081360101699829},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33034229278564453},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.3190799355506897},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.0680827796459198}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8463759422302246},{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.8104465007781982},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7571349143981934},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.5936971306800842},{"id":"https://openalex.org/C135798126","wikidata":"https://www.wikidata.org/wiki/Q2167279","display_name":"Top-down and bottom-up design","level":2,"score":0.5443775653839111},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5401448607444763},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5121588706970215},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.491342693567276},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4805322289466858},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.41530752182006836},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.41465625166893005},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4081360101699829},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33034229278564453},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.3190799355506897},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0680827796459198},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tasl.2011.2159710","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tasl.2011.2159710","pdf_url":null,"source":{"id":"https://openalex.org/S199497470","display_name":"IEEE Transactions on Audio Speech and Language Processing","issn_l":"1558-7916","issn":["1558-7916","1558-7924"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.699999988079071,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W41021157","https://openalex.org/W90546637","https://openalex.org/W100802191","https://openalex.org/W119794901","https://openalex.org/W184672293","https://openalex.org/W1482605500","https://openalex.org/W1484181928","https://openalex.org/W1524832141","https://openalex.org/W1569447338","https://openalex.org/W1574017590","https://openalex.org/W1584960981","https://openalex.org/W2001936727","https://openalex.org/W2052269122","https://openalex.org/W2060656758","https://openalex.org/W2081074144","https://openalex.org/W2097470427","https://openalex.org/W2103375217","https://openalex.org/W2106015547","https://openalex.org/W2115130489","https://openalex.org/W2132990350","https://openalex.org/W2134584072","https://openalex.org/W2135636388","https://openalex.org/W2136032552","https://openalex.org/W2136752740","https://openalex.org/W2138730338","https://openalex.org/W2147520277","https://openalex.org/W2147833929","https://openalex.org/W2148613904","https://openalex.org/W2155995145","https://openalex.org/W2159591770","https://openalex.org/W2164512637","https://openalex.org/W2169264834","https://openalex.org/W2216290105","https://openalex.org/W2338994564","https://openalex.org/W2398758317","https://openalex.org/W4299586809","https://openalex.org/W4300666183","https://openalex.org/W6601719842","https://openalex.org/W6604874165","https://openalex.org/W6628802092","https://openalex.org/W6628911050","https://openalex.org/W6633847657","https://openalex.org/W6634515183","https://openalex.org/W6634978355","https://openalex.org/W6635810183","https://openalex.org/W6659344013","https://openalex.org/W6679595104","https://openalex.org/W6680024980","https://openalex.org/W6681846537","https://openalex.org/W6688727824"],"related_works":["https://openalex.org/W2118039276","https://openalex.org/W2206035908","https://openalex.org/W2162158162","https://openalex.org/W4247736853","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W2175373321","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2938358845"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,186],"theoretical":[4],"framework":[5],"to":[6,18,41,65,72,143],"analyze":[7],"the":[8,12,36,73,152,157,169,177,192,195,207,211],"relative":[9],"merits":[10,193],"of":[11,129,162,194,203,210],"two":[13,37,102,196,212],"most":[14],"general,":[15],"dominant":[16],"approaches":[17,38,53,126],"speaker":[19,46,84,145],"diarization":[20],"involving":[21],"bottom-up":[22,52,123,158,178],"and":[23,49,59,124,131,133,164,175,205],"top-down":[24,76,125,153,170],"hierarchical":[25],"clustering.":[26],"We":[27,97],"present":[28],"an":[29,200],"original":[30],"qualitative":[31],"comparison":[32],"which":[33,89,109,141],"argues":[34],"how":[35],"are":[39,90],"likely":[40],"exhibit":[42],"different":[43],"behavior":[44],"in":[45,78],"inventory":[47],"optimization":[48],"model":[50],"training:":[51],"will":[54,60,80],"capture":[55],"comparatively":[56],"purer":[57],"models":[58,85,88],"thus":[61],"be":[62,119],"more":[63,148],"sensitive":[64],"nuisance":[66,95],"variation":[67],"such":[68],"as":[69],"that":[70,115,134,168,176,190],"related":[71],"speech":[74],"content;":[75],"approaches,":[77],"contrast,":[79],"produce":[81],"less":[82,173,181],"discriminative":[83,174],"but,":[86],"importantly,":[87],"potentially":[91],"better":[92],"normalized":[93],"against":[94],"variation.":[96],"report":[98,185],"experiments":[99],"conducted":[100],"on":[101],"standard,":[103],"single-channel":[104],"NIST":[105],"RT":[106],"evaluation":[107],"datasets":[108],"validate":[110],"our":[111],"hypotheses.":[112],"Results":[113],"show":[114],"competitive":[116],"performance":[117],"can":[118],"achieved":[120],"with":[121,151,156],"both":[122],"(average":[127,160],"DERs":[128,161],"21%":[130],"22%),":[132],"neither":[135],"approach":[136],"is":[137,172,180],"superior.":[138],"Speaker":[139],"purification,":[140],"aims":[142],"improve":[144],"discrimination,":[146],"gives":[147],"consistent":[149],"improvements":[150],"system":[154,159,171,179],"than":[155],"19%":[163],"25%),":[165],"thereby":[166],"confirming":[167],"stable.":[182],"Finally,":[183],"we":[184],"new":[187],"combination":[188],"strategy":[189],"exploits":[191],"approaches.":[197,213],"Combination":[198],"delivers":[199],"average":[201],"DER":[202],"17%":[204],"confirms":[206],"intrinsic":[208],"complementary":[209]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
