{"id":"https://openalex.org/W2101025008","doi":"https://doi.org/10.1109/icassp.2009.4960524","title":"Fusing short term and long term features for improved speaker diarization","display_name":"Fusing short term and long term features for improved speaker diarization","publication_year":2009,"publication_date":"2009-04-01","ids":{"openalex":"https://openalex.org/W2101025008","doi":"https://doi.org/10.1109/icassp.2009.4960524","mag":"2101025008"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2009.4960524","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2009.4960524","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2009 IEEE International Conference on Acoustics, Speech and Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026941022","display_name":"Alyssa Friedland","orcid":null},"institutions":[{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"A. Gerald Friedland","raw_affiliation_strings":["International Computer Science Institute, Berkeley, CA, USA","Intern-l Computer Science Institute, 1947 Center Street Suite 600, Berkeley, CA, 94704, USA"],"affiliations":[{"raw_affiliation_string":"International Computer Science Institute, Berkeley, CA, USA","institution_ids":["https://openalex.org/I1297971548"]},{"raw_affiliation_string":"Intern-l Computer Science Institute, 1947 Center Street Suite 600, Berkeley, CA, 94704, USA","institution_ids":["https://openalex.org/I1297971548"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087470675","display_name":"B. Oriol Vinyals","orcid":null},"institutions":[{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"B. Oriol Vinyals","raw_affiliation_strings":["International Computer Science Institute, Berkeley, CA, USA","Intern-l Computer Science Institute, 1947 Center Street Suite 600, Berkeley, CA, 94704, USA"],"affiliations":[{"raw_affiliation_string":"International Computer Science Institute, Berkeley, CA, USA","institution_ids":["https://openalex.org/I1297971548"]},{"raw_affiliation_string":"Intern-l Computer Science Institute, 1947 Center Street Suite 600, Berkeley, CA, 94704, USA","institution_ids":["https://openalex.org/I1297971548"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103086620","display_name":"Chien-Lin Huang","orcid":"https://orcid.org/0000-0003-3157-4173"},"institutions":[{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Yan Huang","raw_affiliation_strings":["International Computer Science Institute, Berkeley, CA, USA","Intern-l Computer Science Institute, 1947 Center Street Suite 600, Berkeley, CA, 94704, USA"],"affiliations":[{"raw_affiliation_string":"International Computer Science Institute, Berkeley, CA, USA","institution_ids":["https://openalex.org/I1297971548"]},{"raw_affiliation_string":"Intern-l Computer Science Institute, 1947 Center Street Suite 600, Berkeley, CA, 94704, USA","institution_ids":["https://openalex.org/I1297971548"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047192599","display_name":"Dennis M\u00fcller","orcid":"https://orcid.org/0000-0002-4482-4912"},"institutions":[{"id":"https://openalex.org/I33256026","display_name":"German Research Centre for Artificial Intelligence","ror":"https://ror.org/01ayc5b57","country_code":"DE","type":"funder","lineage":["https://openalex.org/I33256026"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"D. Christian Muller","raw_affiliation_strings":["German Research Center for Artificial Intelligence, Saarbruecken, Germany","German Research Center for AI, Stuhlsatzenhausweg 3, 66123 Saarbr\u00fccken, Germany"],"affiliations":[{"raw_affiliation_string":"German Research Center for Artificial Intelligence, Saarbruecken, Germany","institution_ids":["https://openalex.org/I33256026"]},{"raw_affiliation_string":"German Research Center for AI, Stuhlsatzenhausweg 3, 66123 Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I33256026"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5026941022"],"corresponding_institution_ids":["https://openalex.org/I1297971548"],"apc_list":null,"apc_paid":null,"fwci":2.6171,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.90758185,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"4077","last_page":"4080"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nist","display_name":"NIST","score":0.9554340839385986},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.9114880561828613},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.8352330327033997},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7512078285217285},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6933867931365967},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5243228673934937},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4542534649372101},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42424216866493225},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3288753926753998}],"concepts":[{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.9554340839385986},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.9114880561828613},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.8352330327033997},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7512078285217285},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6933867931365967},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5243228673934937},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4542534649372101},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42424216866493225},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3288753926753998},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2009.4960524","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2009.4960524","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2009 IEEE International Conference on Acoustics, Speech and Signal Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.6600000262260437}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W75083802","https://openalex.org/W1538905335","https://openalex.org/W2042860351","https://openalex.org/W2113386290","https://openalex.org/W2144125830","https://openalex.org/W2338994564","https://openalex.org/W3127686677","https://openalex.org/W4244494905","https://openalex.org/W6632339187","https://openalex.org/W6659344013","https://openalex.org/W6917638038","https://openalex.org/W6967213981"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W4297807400","https://openalex.org/W1491159402","https://openalex.org/W2144208207","https://openalex.org/W4389984014","https://openalex.org/W1509309911","https://openalex.org/W1940231550","https://openalex.org/W1599425004","https://openalex.org/W2118860825","https://openalex.org/W2096510939"],"abstract_inverted_index":{"The":[0,61],"following":[1],"article":[2],"shows":[3],"how":[4,43],"a":[5,28,73,102,127],"state-of-the-art":[6],"speaker":[7,33,59],"diarization":[8,81],"system":[9,88],"can":[10,48],"be":[11,49],"improved":[12],"by":[13],"combining":[14],"traditional":[15],"short-term":[16,52],"features":[17,47,53,123,137],"(MFCCs)":[18],"with":[19,51],"prosodic":[20,120],"and":[21,71,121],"other":[22,142],"long-term":[23,38,46,122],"features.":[24,39],"First,":[25],"we":[26,41,108,131],"present":[27],"framework":[29],"to":[30,54,85,140],"study":[31],"the":[32,44,56,86,91,119,135],"discriminability":[34],"of":[35,58,76,105],"70":[36],"different":[37],"Then,":[40],"show":[42,72],"top-ranked":[45],"combined":[50],"increase":[55],"accuracy":[57],"diarization.":[60],"results":[62],"were":[63,124],"measured":[64],"on":[65,101],"standardized":[66],"data":[67],"sets":[68],"(NIST":[69],"RT)":[70],"consistent":[74],"improvement":[75],"about":[77],"30%":[78],"relative":[79],"in":[80,94],"error":[82],"rate":[83],"compared":[84],"best":[87],"presented":[89],"at":[90],"NIST":[92],"evaluation":[93],"2007.":[95],"This":[96],"result":[97],"was":[98],"also":[99],"verified":[100],"wide":[103],"set":[104],"meetings,":[106],"which":[107],"call":[109],"CombDev,":[110],"that":[111,134,144],"contains":[112],"21":[113],"meetings":[114],"from":[115],"previous":[116],"evaluations.":[117],"Since":[118],"selected":[125],"using":[126],"diarization-independent":[128],"speaker-discriminability":[129],"study,":[130],"are":[132,138],"confident":[133],"same":[136],"able":[139],"improve":[141],"systems":[143],"perform":[145],"similar":[146],"tasks":[147]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
