{"id":"https://openalex.org/W2937501454","doi":"https://doi.org/10.1109/icassp.2019.8682924","title":"Speaker Change Detection Using Fundamental Frequency with Application to Multi-talker Segmentation","display_name":"Speaker Change Detection Using Fundamental Frequency with Application to Multi-talker Segmentation","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2937501454","doi":"https://doi.org/10.1109/icassp.2019.8682924","mag":"2937501454"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2019.8682924","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682924","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031002666","display_name":"Aidan O. T. Hogg","orcid":"https://orcid.org/0000-0001-5501-7799"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Aidan O. T. Hogg","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Imperial College, London, UK"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Imperial College, London, UK","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061813792","display_name":"Christine Evers","orcid":"https://orcid.org/0000-0003-0757-5504"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Christine Evers","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Imperial College, London, UK"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Imperial College, London, UK","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016227729","display_name":"Patrick A. Naylor","orcid":"https://orcid.org/0000-0001-8546-8013"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Patrick A. Naylor","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Imperial College, London, UK"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Imperial College, London, UK","institution_ids":["https://openalex.org/I47508984"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5031002666"],"corresponding_institution_ids":["https://openalex.org/I47508984"],"apc_list":null,"apc_paid":null,"fwci":1.1201,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.83363078,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5826","last_page":"5830"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7897804975509644},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7068192958831787},{"id":"https://openalex.org/keywords/change-detection","display_name":"Change detection","score":0.6941141486167908},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.6858046054840088},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6606875658035278},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.6211634874343872},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5746854543685913},{"id":"https://openalex.org/keywords/kalman-filter","display_name":"Kalman filter","score":0.5461689829826355},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.44491520524024963},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3741911053657532},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35788023471832275},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.2586662173271179},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08628803491592407}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7897804975509644},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7068192958831787},{"id":"https://openalex.org/C203595873","wikidata":"https://www.wikidata.org/wiki/Q25389927","display_name":"Change detection","level":2,"score":0.6941141486167908},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.6858046054840088},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6606875658035278},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.6211634874343872},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5746854543685913},{"id":"https://openalex.org/C157286648","wikidata":"https://www.wikidata.org/wiki/Q846780","display_name":"Kalman filter","level":2,"score":0.5461689829826355},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.44491520524024963},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3741911053657532},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35788023471832275},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2586662173271179},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08628803491592407},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icassp.2019.8682924","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682924","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.soton.ac.uk:439803","is_oa":false,"landing_page_url":"https://eprints.soton.ac.uk/439803/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401019","display_name":"ePrints Soton (University of Southampton)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I43439940","host_organization_name":"University of Southampton","host_organization_lineage":["https://openalex.org/I43439940"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:spiral.imperial.ac.uk:10044/1/68311","is_oa":false,"landing_page_url":"http://hdl.handle.net/10044/1/68311","pdf_url":null,"source":{"id":"https://openalex.org/S4306401396","display_name":"Spiral (Imperial College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I47508984","host_organization_name":"Imperial College London","host_organization_lineage":["https://openalex.org/I47508984"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5199999809265137,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G7870544646","display_name":null,"funder_award_id":"EP/P001017/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W142991516","https://openalex.org/W1485877426","https://openalex.org/W1589339348","https://openalex.org/W1968491588","https://openalex.org/W1990449877","https://openalex.org/W2009674825","https://openalex.org/W2010040043","https://openalex.org/W2044525818","https://openalex.org/W2044591814","https://openalex.org/W2052269122","https://openalex.org/W2074394031","https://openalex.org/W2081074144","https://openalex.org/W2095772307","https://openalex.org/W2102601039","https://openalex.org/W2105934661","https://openalex.org/W2115426281","https://openalex.org/W2125336414","https://openalex.org/W2129070077","https://openalex.org/W2129120544","https://openalex.org/W2148154194","https://openalex.org/W2162627003","https://openalex.org/W2404921859","https://openalex.org/W2407024733","https://openalex.org/W2438170160","https://openalex.org/W2602334317","https://openalex.org/W2735405579","https://openalex.org/W2914051064","https://openalex.org/W3106321628","https://openalex.org/W4285719527","https://openalex.org/W4296927107","https://openalex.org/W6605762983","https://openalex.org/W6628987425","https://openalex.org/W6635144179","https://openalex.org/W6679097437","https://openalex.org/W6713136263","https://openalex.org/W6718319860","https://openalex.org/W6735862337","https://openalex.org/W6759033407"],"related_works":["https://openalex.org/W1963976507","https://openalex.org/W2206035908","https://openalex.org/W3119288895","https://openalex.org/W1493012537","https://openalex.org/W2185075503","https://openalex.org/W2162158162","https://openalex.org/W2131673591","https://openalex.org/W4247736853","https://openalex.org/W2104528589","https://openalex.org/W3155728191"],"abstract_inverted_index":{"This":[0,102],"paper":[1],"shows":[2],"that":[3,28,44,66,91],"time":[4],"varying":[5,50],"pitch":[6,31,47,69,96,156],"properties":[7],"can":[8,53],"be":[9,54],"used":[10,110],"advantageously":[11],"within":[12],"the":[13,38,68,83,121,133,149],"segmentation":[14,112],"step":[15],"of":[16,35,58,95,151,161],"a":[17,22,59,80,86,108,153,158,162,172,183],"multi-talker":[18],"diarization":[19],"system.":[20,113],"First":[21],"study":[23],"is":[24,41,48,64,70,75,89,104,117,157],"conducted":[25],"to":[26,79,119,128,174],"verify":[27],"changes":[29,36,178],"in":[30,37,82,132,143,155,165],"are":[32,138],"strong":[33],"indicators":[34],"speaker.":[39,84],"It":[40],"then":[42,73,105],"highlighted":[43],"an":[45],"individual's":[46],"smoothly":[49],"and,":[51],"therefore,":[52],"predicted":[55],"by":[56],"means":[57],"Kalman":[60],"filter.":[61],"Subsequently":[62],"it":[63],"shown":[65,118],"if":[67],"not":[71],"predictable":[72],"this":[74,93,144],"most":[76],"likely":[77],"due":[78],"change":[81,100,123,154,164],"Finally,":[85],"novel":[87],"system":[88,103,116],"proposed":[90,115],"uses":[92],"approach":[94],"prediction":[97],"for":[98],"speaker":[99,122,163,177],"detection.":[101],"evaluated":[106],"against":[107],"commonly":[109],"MFCC":[111],"The":[114],"increase":[120],"detection":[124],"rate":[125],"from":[126],"43.3%":[127],"70.5%":[129],"on":[130,182],"meetings":[131],"AMI":[134],"corpus.":[135,187],"Therefore,":[136],"there":[137],"two":[139],"equally":[140],"weighted":[141],"contributions":[142],"paper:":[145],"1.":[146],"We":[147,170],"address":[148],"question":[150],"whether":[152],"reliable":[159],"estimator":[160],"multi-talk":[166],"meeting":[167,186],"audio.":[168],"2.":[169],"develop":[171],"method":[173],"extract":[175],"such":[176],"and":[179],"test":[180],"them":[181],"widely":[184],"available":[185]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
