{"id":"https://openalex.org/W4375869221","doi":"https://doi.org/10.1109/icassp49357.2023.10096449","title":"In Search of Strong Embedding Extractors for Speaker Diarisation","display_name":"In Search of Strong Embedding Extractors for Speaker Diarisation","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869221","doi":"https://doi.org/10.1109/icassp49357.2023.10096449"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096449","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096449","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091834525","display_name":"Jee-weon Jung","orcid":"https://orcid.org/0000-0003-0505-2988"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jee-Weon Jung","raw_affiliation_strings":["NAVER Corporation,South Korea","NAVER Corporation, South Korea","NAVER Cloud Corporation, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER Corporation,South Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"NAVER Corporation, South Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"NAVER Cloud Corporation, South Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070613375","display_name":"Hee-Soo Heo","orcid":"https://orcid.org/0000-0003-1567-123X"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hee-Soo Heo","raw_affiliation_strings":["NAVER Cloud Corporation,South Korea","NAVER Cloud Corporation, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER Cloud Corporation,South Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"NAVER Cloud Corporation, South Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086369397","display_name":"Bong\u2010Jin Lee","orcid":"https://orcid.org/0000-0001-7896-2961"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Bong-Jin Lee","raw_affiliation_strings":["NAVER Cloud Corporation,South Korea","NAVER Cloud Corporation, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER Cloud Corporation,South Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"NAVER Cloud Corporation, South Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114067866","display_name":"Jaesung Huh","orcid":"https://orcid.org/0000-0001-7247-6401"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I4210155701","display_name":"Geomechanica (Canada)","ror":"https://ror.org/0534wbh79","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210155701"]}],"countries":["CA","GB"],"is_corresponding":false,"raw_author_name":"Jaesung Huh","raw_affiliation_strings":["University of Oxford,Visual Geometry Group,Department of Engineering Science,UK","Department of Engineering Science, Visual Geometry Group, University of Oxford, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oxford,Visual Geometry Group,Department of Engineering Science,UK","institution_ids":["https://openalex.org/I4210155701"]},{"raw_affiliation_string":"Department of Engineering Science, Visual Geometry Group, University of Oxford, UK","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084345521","display_name":"Andrew J. Brown","orcid":"https://orcid.org/0000-0002-4475-0116"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I4210155701","display_name":"Geomechanica (Canada)","ror":"https://ror.org/0534wbh79","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210155701"]}],"countries":["CA","GB"],"is_corresponding":false,"raw_author_name":"Andrew Brown","raw_affiliation_strings":["University of Oxford,Visual Geometry Group,Department of Engineering Science,UK","Department of Engineering Science, Visual Geometry Group, University of Oxford, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oxford,Visual Geometry Group,Department of Engineering Science,UK","institution_ids":["https://openalex.org/I4210155701"]},{"raw_affiliation_string":"Department of Engineering Science, Visual Geometry Group, University of Oxford, UK","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067505925","display_name":"Youngki Kwon","orcid":null},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Youngki Kwon","raw_affiliation_strings":["NAVER Cloud Corporation,South Korea","NAVER Cloud Corporation, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER Cloud Corporation,South Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"NAVER Cloud Corporation, South Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Carnegie Mellon University,Pittsburgh,PA,USA","Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Pittsburgh,PA,USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038723822","display_name":"Joon Son Chung","orcid":"https://orcid.org/0000-0001-7741-7275"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Joon Son Chung","raw_affiliation_strings":["Korea Advanced Institute of Science and Technology,South Korea","Korea Advanced Institute of Science and Technology, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Korea Advanced Institute of Science and Technology,South Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Korea Advanced Institute of Science and Technology, South Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5091834525"],"corresponding_institution_ids":["https://openalex.org/I60922564"],"apc_list":null,"apc_paid":null,"fwci":1.481,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.853777,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.8092599511146545},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8027292490005493},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6945438981056213},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.6127861142158508},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.6049118638038635},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5970263481140137},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5599671602249146},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3745369017124176}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.8092599511146545},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8027292490005493},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6945438981056213},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.6127861142158508},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.6049118638038635},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5970263481140137},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5599671602249146},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3745369017124176},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096449","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096449","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1591607137","https://openalex.org/W2035890032","https://openalex.org/W2038101708","https://openalex.org/W2081074144","https://openalex.org/W2083751884","https://openalex.org/W2097583714","https://openalex.org/W2132914434","https://openalex.org/W2638067502","https://openalex.org/W2726515241","https://openalex.org/W2808631503","https://openalex.org/W2896538040","https://openalex.org/W2928165649","https://openalex.org/W2942551338","https://openalex.org/W2972949456","https://openalex.org/W3015780472","https://openalex.org/W3015783745","https://openalex.org/W3024869864","https://openalex.org/W3025075133","https://openalex.org/W3025260599","https://openalex.org/W3038871978","https://openalex.org/W3095212884","https://openalex.org/W3097777922","https://openalex.org/W3098454764","https://openalex.org/W3147778289","https://openalex.org/W3160044950","https://openalex.org/W3162347631","https://openalex.org/W3178462146","https://openalex.org/W3196595845","https://openalex.org/W3197331597","https://openalex.org/W3204593031","https://openalex.org/W3212886388","https://openalex.org/W4214556932","https://openalex.org/W4221154745","https://openalex.org/W4296069292","https://openalex.org/W4297841362","https://openalex.org/W4297841773","https://openalex.org/W4307076610","https://openalex.org/W4375839990","https://openalex.org/W6845925762"],"related_works":["https://openalex.org/W4389984014","https://openalex.org/W2144208207","https://openalex.org/W1509309911","https://openalex.org/W1599425004","https://openalex.org/W2118860825","https://openalex.org/W2096510939","https://openalex.org/W2144470400","https://openalex.org/W1516392727","https://openalex.org/W2140022733","https://openalex.org/W2911612049"],"abstract_inverted_index":{"Speaker":[0],"embedding":[1,74,133,166],"extractors":[2,75,134,167],"(EEs),":[3],"which":[4,32,81],"map":[5],"input":[6],"audio":[7],"to":[8,69,127],"a":[9],"speaker":[10,19,50,62,91,98,111,140,165],"discriminant":[11],"latent":[12],"space,":[13],"are":[14,23,87,173],"of":[15,94,136],"paramount":[16],"importance":[17],"in":[18,80,90],"diarisation.":[20,53],"However,":[21],"there":[22],"several":[24],"challenges":[25],"when":[26],"adopting":[27],"EEs":[28],"for":[29],"diarisation,":[30],"from":[31],"we":[33,109],"tackle":[34],"two":[35,123,155],"key":[36],"problems.":[37],"First,":[38],"the":[39,45,102,106,118,129,150],"evaluation":[40,64,113],"is":[41],"not":[42,67,77],"straightforward":[43],"because":[44,93],"required":[46],"features":[47],"differ":[48],"between":[49],"verification":[51,63,112],"and":[52,97,149],"We":[54,121],"show":[55],"that":[56,115,169],"better":[57,70,116],"performance":[58],"on":[59],"widely":[60],"adopted":[61],"protocols":[65,114],"does":[66],"lead":[68],"diarisation":[71,92,119],"performance.":[72,103],"Second,":[73],"have":[76],"seen":[78],"utterances":[79],"multiple":[82],"speakers":[83,156],"exist.":[84],"These":[85],"inputs":[86],"inevitably":[88],"present":[89],"overlapped":[95,137,146],"speech":[96,138,147],"changes;":[99],"they":[100],"degrade":[101],"To":[104],"mitigate":[105],"first":[107],"problem,":[108,131],"generate":[110],"mimic":[117],"scenario.":[120],"propose":[122],"data":[124],"augmentation":[125],"techniques":[126],"alleviate":[128],"second":[130],"making":[132],"aware":[135],"or":[139],"change":[141],"input.":[142],"One":[143],"technique":[144],"generates":[145,152],"segments,":[148],"other":[151],"segments":[153],"where":[154],"utter":[157],"sequentially.":[158],"Extensive":[159],"experimental":[160],"results":[161],"using":[162],"three":[163],"state-of-the-art":[164],"demonstrate":[168],"both":[170],"proposed":[171],"approaches":[172],"effective.":[174]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6}],"updated_date":"2026-06-02T09:04:35.204637","created_date":"2025-10-10T00:00:00"}
