{"id":"https://openalex.org/W1600395290","doi":"https://doi.org/10.21437/interspeech.2010-702","title":"An integrated top-down/bottom-up approach to speaker diarization","display_name":"An integrated top-down/bottom-up approach to speaker diarization","publication_year":2010,"publication_date":"2010-09-26","ids":{"openalex":"https://openalex.org/W1600395290","doi":"https://doi.org/10.21437/interspeech.2010-702","mag":"1600395290"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2010-702","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2010-702","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2010","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010332658","display_name":"Simon Bozonnet","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Simon Bozonnet","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066811192","display_name":"Nicholas Evans","orcid":"https://orcid.org/0000-0002-8459-1041"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nicholas Evans","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018458807","display_name":"Corinne Fredouille","orcid":"https://orcid.org/0000-0002-0413-8950"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Corinne Fredouille","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Dong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5022574838","display_name":"Rapha\u00ebl Troncy","orcid":"https://orcid.org/0000-0003-0457-1436"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rapha\u00ebl Troncy","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5010332658"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8042,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.86310909,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2646","last_page":"2649"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9704999923706055,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9704999923706055,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9028000235557556,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.7707521915435791},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6421427726745605},{"id":"https://openalex.org/keywords/top-down-and-bottom-up-design","display_name":"Top-down and bottom-up design","score":0.634860634803772},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4440467655658722},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.3581165075302124},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.055701255798339844}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.7707521915435791},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6421427726745605},{"id":"https://openalex.org/C135798126","wikidata":"https://www.wikidata.org/wiki/Q2167279","display_name":"Top-down and bottom-up design","level":2,"score":0.634860634803772},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4440467655658722},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.3581165075302124},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.055701255798339844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2010-702","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2010-702","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2010","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","display_name":"No poverty","score":0.41999998688697815}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W119794901","https://openalex.org/W184672293","https://openalex.org/W1815845248","https://openalex.org/W2001936727","https://openalex.org/W2052269122","https://openalex.org/W2097470427","https://openalex.org/W2114981053","https://openalex.org/W2115130489","https://openalex.org/W2138730338","https://openalex.org/W2155995145","https://openalex.org/W2338994564"],"related_works":["https://openalex.org/W1521049138","https://openalex.org/W2144208207","https://openalex.org/W1497807607","https://openalex.org/W1813780412","https://openalex.org/W1509309911","https://openalex.org/W1940231550","https://openalex.org/W2148232863","https://openalex.org/W2160753975","https://openalex.org/W2118860825","https://openalex.org/W2161543916"],"abstract_inverted_index":{"Most":[0],"speaker":[1,61,104,137,139,141],"diarization":[2,62,105],"systems":[3,14,32,87],"fit":[4],"into":[5],"one":[6],"of":[7,69,93,130],"two":[8,86],"categories:":[9],"bottom-up":[10],"or":[11],"top-down.":[12],"Bottom-up":[13],"are":[15,37,88],"the":[16,67,85,91,94,121],"most":[17],"popular":[18],"but":[19,36],"can":[20],"sometimes":[21],"suffer":[22],"from":[23,25],"instability":[24],"merging":[26],"and":[27,72,77,96,110,117],"stopping":[28],"criteria":[29],"difficulties.":[30],"Top-down":[31],"deliver":[33],"competitive":[34],"results":[35,100],"particularly":[38],"prone":[39],"to":[40,47,60,65,74,81],"poor":[41],"model":[42],"initialization":[43],"which":[44,63],"often":[45],"leads":[46],"large":[48],"variations":[49],"in":[50,103,124],"performance.":[51],"This":[52],"paper":[53],"presents":[54],"a":[55],"new":[56],"integrated":[57],"bottom-up/topdown":[58],"approach":[59],"aims":[64],"harness":[66],"strengths":[68],"each":[70],"system":[71,143],"thus":[73],"improve":[75],"performance":[76,106],"stability.":[78,119],"In":[79],"contrast":[80],"previous":[82],"work,":[83],"here":[84],"fused":[89],"at":[90],"heart":[92],"segmentation":[95],"clustering":[97],"stage.":[98],"Experimental":[99],"show":[101],"improvements":[102],"for":[107],"both":[108],"meeting":[109],"TV-show":[111],"domain":[112],"data":[113,123],"indicating":[114],"increased":[115],"intra":[116],"inter-domain":[118],"On":[120],"TVshow":[122],"particular,":[125],"an":[126],"average":[127],"relative":[128],"improvement":[129],"32%":[131],"DER":[132],"is":[133],"obtained.":[134],"Index":[135],"Terms:":[136],"diarization,":[138],"segmentation,":[140],"clustering,":[142],"combination,":[144],"SDM":[145]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
