{"id":"https://openalex.org/W2973064719","doi":"https://doi.org/10.21437/interspeech.2019-2716","title":"LEAP Diarization System for the Second DIHARD Challenge","display_name":"LEAP Diarization System for the Second DIHARD Challenge","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2973064719","doi":"https://doi.org/10.21437/interspeech.2019-2716","mag":"2973064719"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-2716","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://eprints.qut.edu.au/132059/1/DIHARD_2019_challenge.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101528020","display_name":"Prachi Singh","orcid":"https://orcid.org/0000-0003-0760-8572"},"institutions":[{"id":"https://openalex.org/I132153292","display_name":"Indian Institute of Technology Patna","ror":"https://ror.org/01ft5vz71","country_code":"IN","type":"education","lineage":["https://openalex.org/I132153292"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Prachi Singh","raw_affiliation_strings":["Learning and Extraction of Acoustic Patterns (LEAP) Lab, Indian Institute of Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Learning and Extraction of Acoustic Patterns (LEAP) Lab, Indian Institute of Science","institution_ids":["https://openalex.org/I132153292"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112222277","display_name":"Harsha Vardhan","orcid":null},"institutions":[{"id":"https://openalex.org/I132153292","display_name":"Indian Institute of Technology Patna","ror":"https://ror.org/01ft5vz71","country_code":"IN","type":"education","lineage":["https://openalex.org/I132153292"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Harsha Vardhan M.A.","raw_affiliation_strings":["Learning and Extraction of Acoustic Patterns (LEAP) Lab, Indian Institute of Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Learning and Extraction of Acoustic Patterns (LEAP) Lab, Indian Institute of Science","institution_ids":["https://openalex.org/I132153292"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002536077","display_name":"Sriram Ganapathy","orcid":"https://orcid.org/0000-0002-5779-9066"},"institutions":[{"id":"https://openalex.org/I132153292","display_name":"Indian Institute of Technology Patna","ror":"https://ror.org/01ft5vz71","country_code":"IN","type":"education","lineage":["https://openalex.org/I132153292"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sriram Ganapathy","raw_affiliation_strings":["Learning and Extraction of Acoustic Patterns (LEAP) Lab, Indian Institute of Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Learning and Extraction of Acoustic Patterns (LEAP) Lab, Indian Institute of Science","institution_ids":["https://openalex.org/I132153292"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023434638","display_name":"Ahilan Kanagasundaram","orcid":"https://orcid.org/0000-0002-0533-7986"},"institutions":[{"id":"https://openalex.org/I198412587","display_name":"University of Jaffna","ror":"https://ror.org/02fwjgw17","country_code":"LK","type":"education","lineage":["https://openalex.org/I198412587"]}],"countries":["LK"],"is_corresponding":false,"raw_author_name":"A. Kanagasundaram","raw_affiliation_strings":["University of Jaffna, Sri Lanka"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Jaffna, Sri Lanka","institution_ids":["https://openalex.org/I198412587"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2687,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.85277794,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"983","last_page":"987"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.8531703948974609},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7446420192718506},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.6800958514213562},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6301262974739075},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5622819066047668},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.549449622631073},{"id":"https://openalex.org/keywords/linear-discriminant-analysis","display_name":"Linear discriminant analysis","score":0.5321523547172546},{"id":"https://openalex.org/keywords/multidimensional-scaling","display_name":"Multidimensional scaling","score":0.5134245157241821},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.47953271865844727},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.47483545541763306},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.474136084318161},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4738239049911499},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4654933214187622},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.45395368337631226},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.41674819588661194},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.38911348581314087},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3150264024734497},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10635888576507568}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.8531703948974609},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7446420192718506},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.6800958514213562},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6301262974739075},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5622819066047668},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.549449622631073},{"id":"https://openalex.org/C69738355","wikidata":"https://www.wikidata.org/wiki/Q1228929","display_name":"Linear discriminant analysis","level":2,"score":0.5321523547172546},{"id":"https://openalex.org/C91682802","wikidata":"https://www.wikidata.org/wiki/Q620538","display_name":"Multidimensional scaling","level":2,"score":0.5134245157241821},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.47953271865844727},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.47483545541763306},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.474136084318161},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4738239049911499},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4654933214187622},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.45395368337631226},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.41674819588661194},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.38911348581314087},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3150264024734497},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10635888576507568},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2019-2716","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.qut.edu.au:132059","is_oa":true,"landing_page_url":"https://interspeech2019.org/","pdf_url":"https://eprints.qut.edu.au/132059/1/DIHARD_2019_challenge.pdf","source":{"id":"https://openalex.org/S4306402607","display_name":"QUT ePrints (Queensland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I160993911","host_organization_name":"Queensland University of Technology","host_organization_lineage":["https://openalex.org/I160993911"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 20th Annual Conference of the International Speech Communication Association (INTERSPEECH 2019): Crossroads of Speech and Language","raw_type":"Chapter in Book, Report or Conference volume"}],"best_oa_location":{"id":"pmh:oai:eprints.qut.edu.au:132059","is_oa":true,"landing_page_url":"https://interspeech2019.org/","pdf_url":"https://eprints.qut.edu.au/132059/1/DIHARD_2019_challenge.pdf","source":{"id":"https://openalex.org/S4306402607","display_name":"QUT ePrints (Queensland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I160993911","host_organization_name":"Queensland University of Technology","host_organization_lineage":["https://openalex.org/I160993911"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 20th Annual Conference of the International Speech Communication Association (INTERSPEECH 2019): Crossroads of Speech and Language","raw_type":"Chapter in Book, Report or Conference volume"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.6200000047683716}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2973064719.pdf","grobid_xml":"https://content.openalex.org/works/W2973064719.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W118310762","https://openalex.org/W1509663858","https://openalex.org/W1524333225","https://openalex.org/W1589339348","https://openalex.org/W2121812409","https://openalex.org/W2150769028","https://openalex.org/W2405137844","https://openalex.org/W2612266954","https://openalex.org/W2638067502","https://openalex.org/W2726515241","https://openalex.org/W2746471976","https://openalex.org/W2805869973","https://openalex.org/W2806563680","https://openalex.org/W2807629722","https://openalex.org/W2808631503","https://openalex.org/W2889418727","https://openalex.org/W2889419636","https://openalex.org/W2889496963","https://openalex.org/W2890964092","https://openalex.org/W2891237889","https://openalex.org/W2972449503","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W1483890997","https://openalex.org/W2162158162","https://openalex.org/W4247736853","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W2175373321","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2938358845"],"abstract_inverted_index":{"&lt;p&gt;This":[0],"paper":[1],"presents":[2],"the":[3,8,17,56,75,85,106,119,124,132,146,151,160,171,184,188],"LEAP":[4,41],"System,":[5],"developed":[6,44],"for":[7,145,159],"Second":[9],"DIHARD":[10,152],"diarization":[11,65,108],"Challenge.":[12],"The":[13,40,63],"evaluation":[14,129,165],"data":[15],"in":[16,24,32,105,139,180,187],"challenge":[18],"is":[19,43,82],"composed":[20],"of":[21,48,141],"multi-talker":[22],"speech":[23],"restaurants,":[25],"doctor-patient":[26],"conversations,":[27],"child":[28],"language":[29],"acquisition":[30],"recordings":[31],"home":[33],"environments":[34],"and":[35,55,128,155],"audio":[36],"extracted":[37],"YouTube":[38],"videos.":[39],"system":[42,149,154,162],"using":[45,68,84,170],"two":[46],"types":[47],"embeddings,":[49],"one":[50,58],"based":[51,59],"on":[52,60,74,118,164],"i-vector":[53,120],"representations":[54],"other":[57],"x-vector":[61],"representations.":[62],"initial":[64],"output":[66],"obtained":[67],"agglomerative":[69],"hierarchical":[70],"clustering":[71],"(AHC)":[72],"done":[73],"probabilistic":[76],"linear":[77],"discriminant":[78],"analysis":[79,168],"(PLDA)":[80],"scores":[81],"refined":[83],"Variational-Bayes":[86],"hidden":[87],"Markov":[88],"model":[89,97],"(VB-HMM)":[90],"model.":[91],"We":[92,112],"propose":[93],"a":[94,115],"modified":[95],"VB-HMM":[96],"with":[98],"posterior":[99,173],"scaling":[100,174,178],"which":[101],"provides":[102],"significant":[103],"improvements":[104,138],"final":[107,161],"error":[109],"rate":[110],"(DER).":[111],"also":[113],"use":[114],"domain":[116],"compensation":[117],"features":[121],"to":[122],"reduce":[123],"mis-match":[125],"between":[126],"training":[127],"conditions.N(s)TN(s)TN(s)T":[130],"Using":[131],"proposed":[133,172],"approaches,":[134],"we":[135],"obtain":[136],"relative":[137,144,158],"DER":[140],"about":[142,156],"7.1%":[143],"best":[147],"individual":[148],"over":[150],"baseline":[153],"13.7%":[157],"combination":[163],"set.":[166],"An":[167],"performed":[169],"method":[175],"shows":[176],"that":[177],"results":[179],"improved":[181],"discrimination":[182],"among":[183],"HMM":[185],"states":[186],"VB-HMM.&lt;/p&gt;":[189]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":3}],"updated_date":"2026-07-02T09:51:11.867554","created_date":"2025-10-10T00:00:00"}
