{"id":"https://openalex.org/W2949706715","doi":"https://doi.org/10.1109/ncc.2019.8732210","title":"Comparison of low-dimension speech segment embeddings: Application to speaker diarization","display_name":"Comparison of low-dimension speech segment embeddings: Application to speaker diarization","publication_year":2019,"publication_date":"2019-02-01","ids":{"openalex":"https://openalex.org/W2949706715","doi":"https://doi.org/10.1109/ncc.2019.8732210","mag":"2949706715"},"language":"en","primary_location":{"id":"doi:10.1109/ncc.2019.8732210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ncc.2019.8732210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 National Conference on Communications (NCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004929413","display_name":"Srikanth Raj Chetupalli","orcid":"https://orcid.org/0000-0002-2186-5420"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Srikanth Raj Chetupalli","raw_affiliation_strings":["Dept. of ECE, Indian Institute of Science, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Dept. of ECE, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069425803","display_name":"T.V. Sreenivas","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Thippur V. Sreenivas","raw_affiliation_strings":["Dept. of ECE, Indian Institute of Science, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Dept. of ECE, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111075264","display_name":"Anand Gopalakrishnan","orcid":null},"institutions":[{"id":"https://openalex.org/I42014448","display_name":"Sardar Vallabhbhai National Institute of Technology Surat","ror":"https://ror.org/02y394t43","country_code":"IN","type":"education","lineage":["https://openalex.org/I42014448"]},{"id":"https://openalex.org/I11880225","display_name":"National Institute of Technology Karnataka","ror":"https://ror.org/01hz4v948","country_code":"IN","type":"education","lineage":["https://openalex.org/I11880225"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Anand Gopalakrishnan","raw_affiliation_strings":["Dept. of EEE, National Institute of Technology, Surathkal, Karnataka, India"],"affiliations":[{"raw_affiliation_string":"Dept. of EEE, National Institute of Technology, Surathkal, Karnataka, India","institution_ids":["https://openalex.org/I11880225","https://openalex.org/I42014448"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5004929413"],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05380509,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"17","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.8032550811767578},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.760235071182251},{"id":"https://openalex.org/keywords/multidimensional-scaling","display_name":"Multidimensional scaling","score":0.6786832809448242},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.65789395570755},{"id":"https://openalex.org/keywords/principal-component-analysis","display_name":"Principal component analysis","score":0.6551882028579712},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6239890456199646},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.6184179186820984},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.5902921557426453},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.569758415222168},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5132615566253662},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5020103454589844},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.48348504304885864},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4783479869365692},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.27933889627456665},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.12752923369407654}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.8032550811767578},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.760235071182251},{"id":"https://openalex.org/C91682802","wikidata":"https://www.wikidata.org/wiki/Q620538","display_name":"Multidimensional scaling","level":2,"score":0.6786832809448242},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.65789395570755},{"id":"https://openalex.org/C27438332","wikidata":"https://www.wikidata.org/wiki/Q2873","display_name":"Principal component analysis","level":2,"score":0.6551882028579712},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6239890456199646},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.6184179186820984},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.5902921557426453},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.569758415222168},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5132615566253662},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5020103454589844},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.48348504304885864},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4783479869365692},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27933889627456665},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.12752923369407654},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ncc.2019.8732210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ncc.2019.8732210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 National Conference on Communications (NCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W48568691","https://openalex.org/W115846232","https://openalex.org/W125260059","https://openalex.org/W1663973292","https://openalex.org/W1993436046","https://openalex.org/W2033178790","https://openalex.org/W2038101708","https://openalex.org/W2041823554","https://openalex.org/W2060470839","https://openalex.org/W2074394031","https://openalex.org/W2081074144","https://openalex.org/W2115599677","https://openalex.org/W2150769028","https://openalex.org/W2159591770","https://openalex.org/W2404874347","https://openalex.org/W4212863985","https://openalex.org/W6601947329","https://openalex.org/W6604730075","https://openalex.org/W6605093602","https://openalex.org/W6713414053"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W1483890997","https://openalex.org/W2149220986","https://openalex.org/W1197719229","https://openalex.org/W4247736853","https://openalex.org/W2381158726","https://openalex.org/W2552734076","https://openalex.org/W2162158162","https://openalex.org/W106647055","https://openalex.org/W1992796048"],"abstract_inverted_index":{"Segment":[0],"clustering":[1,16],"is":[2,45],"a":[3,36],"crucial":[4],"step":[5],"in":[6,34,100,106],"unsupervised":[7],"speaker":[8,37],"diarization.":[9],"Bottom-up":[10],"approaches,":[11],"such":[12],"as,":[13],"hierarchical":[14],"agglomerative":[15],"technique":[17],"are":[18],"used":[19],"traditionally":[20],"for":[21],"segment":[22,65,73,75],"clustering.":[23,55],"In":[24],"this":[25],"paper,":[26],"we":[27],"consider":[28],"the":[29,62],"top-down":[30],"approach":[31],"to":[32,74,111],"clustering,":[33],"which":[35],"sensitive,":[38],"low-dimensional":[39],"representation":[40,102],"of":[41,60,89],"segments":[42],"(speaker":[43],"space)":[44],"obtained":[46],"first,":[47],"followed":[48],"by":[49],"Gaussian":[50],"mixture":[51],"model":[52],"(GMM)":[53],"based":[54,71,97],"We":[56,93],"explore":[57],"three":[58],"methods":[59],"obtaining":[61],"low":[63],"dimension":[64],"representation:":[66],"(i)":[67],"multi-dimensional":[68],"scaling":[69],"(MDS)":[70],"on":[72],"stochastic":[76],"distances;":[77],"(ii)":[78],"traditional":[79],"principal":[80],"component":[81],"analysis":[82,87],"(PCA),":[83],"and":[84,103,113],"(iii)":[85],"factor":[86],"(i-vectors),":[88],"GMM":[90],"mean":[91],"super-vectors.":[92],"found":[94],"that,":[95],"MDS":[96],"embeddings":[98],"result":[99,105],"better":[101,107],"hence":[104],"diarization":[108],"performance":[109],"compared":[110],"PCA":[112],"even":[114],"i-vector":[115],"embeddings.":[116]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
