{"id":"https://openalex.org/W2135636388","doi":"https://doi.org/10.1109/tasl.2010.2040796","title":"Tuning-Robust Initialization Methods for Speaker Diarization","display_name":"Tuning-Robust Initialization Methods for Speaker Diarization","publication_year":2010,"publication_date":"2010-01-22","ids":{"openalex":"https://openalex.org/W2135636388","doi":"https://doi.org/10.1109/tasl.2010.2040796","mag":"2135636388"},"language":"en","primary_location":{"id":"doi:10.1109/tasl.2010.2040796","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tasl.2010.2040796","pdf_url":null,"source":{"id":"https://openalex.org/S199497470","display_name":"IEEE Transactions on Audio Speech and Language Processing","issn_l":"1558-7916","issn":["1558-7916","1558-7924"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://infoscience.epfl.ch/record/153578","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037750159","display_name":"David Imseng","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]},{"id":"https://openalex.org/I7495430","display_name":"Idiap Research Institute","ror":"https://ror.org/05932h694","country_code":"CH","type":"facility","lineage":["https://openalex.org/I7495430"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"David Imseng","raw_affiliation_strings":["Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, Lausanne, Switzerland","Idiap Research Institute, Martigny, Switzerland","IDIAP Res. Inst., Martigny, Switzerland"],"affiliations":[{"raw_affiliation_string":"Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]},{"raw_affiliation_string":"Idiap Research Institute, Martigny, Switzerland","institution_ids":["https://openalex.org/I7495430"]},{"raw_affiliation_string":"IDIAP Res. Inst., Martigny, Switzerland","institution_ids":["https://openalex.org/I7495430"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062323483","display_name":"Gerald Friedland","orcid":"https://orcid.org/0000-0002-9400-6539"},"institutions":[{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gerald Friedland","raw_affiliation_strings":["International Computer Science Institute, Berkeley, CA, USA","Int. Comput. Sci. Inst., Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"International Computer Science Institute, Berkeley, CA, USA","institution_ids":["https://openalex.org/I1297971548"]},{"raw_affiliation_string":"Int. Comput. Sci. Inst., Berkeley, CA, USA","institution_ids":["https://openalex.org/I1297971548"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5037750159"],"corresponding_institution_ids":["https://openalex.org/I5124864","https://openalex.org/I7495430"],"apc_list":null,"apc_paid":null,"fwci":6.7657,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.96749891,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"18","issue":"8","first_page":"2028","last_page":"2037"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.9183341264724731},{"id":"https://openalex.org/keywords/nist","display_name":"NIST","score":0.849764347076416},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7430239915847778},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.7382824420928955},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6074203252792358},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5772086381912231},{"id":"https://openalex.org/keywords/bayesian-information-criterion","display_name":"Bayesian information criterion","score":0.536171555519104},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46638038754463196},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.4518043100833893},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.4407901167869568},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.43318891525268555},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.42356395721435547},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4211779236793518},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3767438232898712},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.31738537549972534}],"concepts":[{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.9183341264724731},{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.849764347076416},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7430239915847778},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.7382824420928955},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6074203252792358},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5772086381912231},{"id":"https://openalex.org/C168136583","wikidata":"https://www.wikidata.org/wiki/Q1988242","display_name":"Bayesian information criterion","level":2,"score":0.536171555519104},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46638038754463196},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.4518043100833893},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.4407901167869568},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.43318891525268555},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.42356395721435547},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4211779236793518},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3767438232898712},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.31738537549972534},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tasl.2010.2040796","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tasl.2010.2040796","pdf_url":null,"source":{"id":"https://openalex.org/S199497470","display_name":"IEEE Transactions on Audio Speech and Language Processing","issn_l":"1558-7916","issn":["1558-7916","1558-7924"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:infoscience.epfl.ch:153578","is_oa":true,"landing_page_url":"http://infoscience.epfl.ch/record/153578","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:infoscience.epfl.ch:153578","is_oa":true,"landing_page_url":"http://infoscience.epfl.ch/record/153578","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W202282565","https://openalex.org/W1482605500","https://openalex.org/W1524832141","https://openalex.org/W1554663460","https://openalex.org/W1556474518","https://openalex.org/W1570448133","https://openalex.org/W1574017590","https://openalex.org/W1584960981","https://openalex.org/W2036365100","https://openalex.org/W2082968463","https://openalex.org/W2106780157","https://openalex.org/W2110783210","https://openalex.org/W2126432406","https://openalex.org/W2134584072","https://openalex.org/W2136032552","https://openalex.org/W2144125830","https://openalex.org/W2148613904","https://openalex.org/W2156255174","https://openalex.org/W2164512637","https://openalex.org/W2169165592","https://openalex.org/W2177686784","https://openalex.org/W2187637362","https://openalex.org/W2216290105","https://openalex.org/W2283928653","https://openalex.org/W2338994564","https://openalex.org/W2509822191","https://openalex.org/W2571118946","https://openalex.org/W2966207845","https://openalex.org/W3216401400","https://openalex.org/W4285719527","https://openalex.org/W4388297464","https://openalex.org/W6628911050","https://openalex.org/W6634515183","https://openalex.org/W6634978355","https://openalex.org/W6659344013","https://openalex.org/W6678924068","https://openalex.org/W6680024980","https://openalex.org/W6684648996","https://openalex.org/W6688727824","https://openalex.org/W6725232968","https://openalex.org/W7005290334"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2119836628","https://openalex.org/W2356325859","https://openalex.org/W106647055","https://openalex.org/W2162158162","https://openalex.org/W4247736853","https://openalex.org/W1493012537","https://openalex.org/W2175373321","https://openalex.org/W2157299333","https://openalex.org/W2125642021"],"abstract_inverted_index":{"This":[0,210],"paper":[1],"investigates":[2],"a":[3,17,79,148,205,214],"typical":[4,51],"speaker":[5,155],"diarization":[6,136,156],"system":[7,34,215],"regarding":[8],"its":[9],"robustness":[10],"against":[11],"initialization":[12,40,63,87,150,208],"parameter":[13,151],"variation":[14],"and":[15,65,89,118,124,166,194,201,237],"presents":[16],"method":[18,153,178],"to":[19,37,57,141],"reduce":[20],"manual":[21,95],"tuning":[22,96],"of":[23,29,50,71,85,106,122,134,147,171,186,189,197,227,232],"these":[24],"values":[25,59,88],"significantly.":[26],"The":[27,144,176],"behavior":[28],"an":[30],"agglomerative":[31,159],"hierarchical":[32],"clustering":[33,160],"is":[35,53,139,202],"studied":[36],"determine":[38],"which":[39],"parameters":[41,64],"impact":[42],"accuracy":[43,49,105],"most.":[44],"We":[45,76],"show":[46],"that":[47,81,157,216],"the":[48,58,62,69,74,83,86,92,100,104,107,114,132,135,180,183,187,195,198,220,228,233],"systems":[52],"indeed":[54],"very":[55],"sensitive":[56],"chosen":[60],"for":[61,94,154],"factors":[66],"such":[67],"as":[68],"duration":[70,196],"speech":[72,190,199],"in":[73,213],"recording.":[75],"then":[77],"present":[78],"solution":[80],"reduces":[82,91],"sensitivity":[84],"therefore":[90],"need":[93],"significantly":[97],"while":[98],"at":[99],"same":[101],"time":[102],"increasing":[103],"system.":[108],"For":[109],"short":[110],"meetings":[111],"extracted":[112],"from":[113],"previous":[115],"(2006,":[116],"2007,":[117,236],"2009)":[119],"National":[120],"Institute":[121],"Standards":[123],"Technology":[125],"(NIST)":[126],"Rich":[127],"Transcription":[128],"(RT)":[129],"evaluation":[130],"data,":[131],"decrease":[133],"error":[137],"rate":[138],"up":[140],"50%":[142],"relative.":[143],"approach":[145,211],"consists":[146],"novel":[149,206],"estimation":[152,177],"uses":[158],"with":[161,204],"Bayesian":[162],"information":[163],"criterion":[164],"(BIC)":[165],"Gaussian":[167,193],"mixture":[168],"models":[169],"(GMMs)":[170],"frame-based":[172],"cepstral":[173],"features":[174],"(MFCCs).":[175],"balances":[179],"relationship":[181],"between":[182],"optimal":[184],"value":[185],"seconds":[188],"data":[191,200],"per":[192],"combined":[203],"nonuniform":[207],"method.":[209],"results":[212],"performs":[217],"better":[218],"than":[219],"current":[221],"ICSI":[222],"baseline":[223],"engine":[224],"on":[225],"datasets":[226],"NIST":[229],"RT":[230],"evaluations":[231],"years":[234],"2006,":[235],"2009.":[238]},"counts_by_year":[{"year":2017,"cited_by_count":7},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":11}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
