{"id":"https://openalex.org/W2997785206","doi":"https://doi.org/10.1109/waspaa.2019.8937185","title":"Multiple Hypothesis Tracking for Overlapping Speaker Segmentation","display_name":"Multiple Hypothesis Tracking for Overlapping Speaker Segmentation","publication_year":2019,"publication_date":"2019-10-01","ids":{"openalex":"https://openalex.org/W2997785206","doi":"https://doi.org/10.1109/waspaa.2019.8937185","mag":"2997785206"},"language":"en","primary_location":{"id":"doi:10.1109/waspaa.2019.8937185","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa.2019.8937185","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031002666","display_name":"Aidan O. T. Hogg","orcid":"https://orcid.org/0000-0001-5501-7799"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Aidan O. T. Hogg","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Imperial College London, UK"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Imperial College London, UK","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061813792","display_name":"Christine Evers","orcid":"https://orcid.org/0000-0003-0757-5504"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Christine Evers","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Imperial College London, UK"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Imperial College London, UK","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016227729","display_name":"Patrick A. Naylor","orcid":"https://orcid.org/0000-0001-8546-8013"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Patrick A. Naylor","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Imperial College London, UK"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Imperial College London, UK","institution_ids":["https://openalex.org/I47508984"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5031002666"],"corresponding_institution_ids":["https://openalex.org/I47508984"],"apc_list":null,"apc_paid":null,"fwci":0.8293,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.74444581,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"195","last_page":"199"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.8440357446670532},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8159646987915039},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7570387721061707},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7227602005004883},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5670803785324097},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5312020182609558},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.49570953845977783},{"id":"https://openalex.org/keywords/speech-segmentation","display_name":"Speech segmentation","score":0.44709062576293945},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43458878993988037},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.43004798889160156},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4265749156475067},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4145914316177368}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.8440357446670532},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8159646987915039},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7570387721061707},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7227602005004883},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5670803785324097},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5312020182609558},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.49570953845977783},{"id":"https://openalex.org/C207030507","wikidata":"https://www.wikidata.org/wiki/Q2266173","display_name":"Speech segmentation","level":3,"score":0.44709062576293945},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43458878993988037},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.43004798889160156},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4265749156475067},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4145914316177368},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/waspaa.2019.8937185","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa.2019.8937185","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.soton.ac.uk:439390","is_oa":false,"landing_page_url":"https://eprints.soton.ac.uk/439390/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401019","display_name":"ePrints Soton (University of Southampton)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I43439940","host_organization_name":"University of Southampton","host_organization_lineage":["https://openalex.org/I43439940"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:spiral.imperial.ac.uk:10044/1/72344","is_oa":false,"landing_page_url":"http://hdl.handle.net/10044/1/72344","pdf_url":null,"source":{"id":"https://openalex.org/S4306401396","display_name":"Spiral (Imperial College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I47508984","host_organization_name":"Imperial College London","host_organization_lineage":["https://openalex.org/I47508984"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.49000000953674316,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G7870544646","display_name":null,"funder_award_id":"EP/P001017/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W142991516","https://openalex.org/W177037875","https://openalex.org/W1558276682","https://openalex.org/W1568122762","https://openalex.org/W1589339348","https://openalex.org/W1916910924","https://openalex.org/W1990449877","https://openalex.org/W2044525818","https://openalex.org/W2074394031","https://openalex.org/W2101702585","https://openalex.org/W2105934661","https://openalex.org/W2119599673","https://openalex.org/W2122201922","https://openalex.org/W2125336414","https://openalex.org/W2127923214","https://openalex.org/W2169264834","https://openalex.org/W2237765446","https://openalex.org/W2407024733","https://openalex.org/W2602334317","https://openalex.org/W2735405579","https://openalex.org/W2746241180","https://openalex.org/W2770691132","https://openalex.org/W2791203079","https://openalex.org/W2807015669","https://openalex.org/W2937501454","https://openalex.org/W3127686677","https://openalex.org/W6605762983","https://openalex.org/W6633191483","https://openalex.org/W6635144179","https://openalex.org/W6684823487","https://openalex.org/W6735862337","https://openalex.org/W6743401195","https://openalex.org/W6789826613"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2149220986","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W2125642021","https://openalex.org/W2023466863","https://openalex.org/W2144208207","https://openalex.org/W2099333848"],"abstract_inverted_index":{"Speaker":[0],"segmentation":[1,78],"is":[2,74,89,103],"an":[3],"essential":[4],"part":[5],"of":[6,11,65],"any":[7],"diarization":[8,12],"system.":[9],"Applications":[10],"include":[13],"tasks":[14],"such":[15],"as":[16],"speaker":[17],"indexing,":[18],"improving":[19],"automatic":[20],"speech":[21,56,66],"recognition":[22],"(ASR)":[23],"performance":[24,108],"and":[25,63,88],"making":[26],"single":[27],"speaker-based":[28],"algorithms":[29],"available":[30],"for":[31,109],"use":[32],"in":[33,54,57,119],"multi-speaker":[34],"environments.":[35],"This":[36],"paper":[37],"proposes":[38],"a":[39,77,85],"multiple":[40],"hypothesis":[41],"tracking":[42],"(MHT)":[43],"method":[44,73,102],"that":[45,83],"exploits":[46],"the":[47,52,61,81,115,120],"harmonic":[48,117],"structure":[49],"associated":[50],"with":[51],"pitch":[53,116],"voiced":[55],"order":[58],"to":[59,105],"segment":[60],"onsets":[62],"end-points":[64],"from":[67,80],"multiple,":[68],"overlapping":[69,111],"speakers.":[70],"The":[71,100],"proposed":[72,101],"evaluated":[75],"against":[76],"system":[79],"literature":[82],"uses":[84],"spectral":[86],"representation":[87],"based":[90],"on":[91],"employing":[92],"bidirectional":[93],"long":[94],"short":[95],"term":[96],"memory":[97],"networks":[98],"(BLSTM).":[99],"shown":[104],"achieve":[106],"comparable":[107],"segmenting":[110],"speakers":[112],"only":[113],"using":[114],"information":[118],"MHT":[121],"framework.":[122]},"counts_by_year":[{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
