{"id":"https://openalex.org/W4377971325","doi":"https://doi.org/10.1109/lsp.2023.3279781","title":"Multi-Target Extractor and Detector for Unknown-Number Speaker Diarization","display_name":"Multi-Target Extractor and Detector for Unknown-Number Speaker Diarization","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4377971325","doi":"https://doi.org/10.1109/lsp.2023.3279781"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2023.3279781","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2023.3279781","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102858202","display_name":"Chin-Yi Cheng","orcid":"https://orcid.org/0009-0004-2088-4058"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Chin-Yi Cheng","raw_affiliation_strings":["National Taiwan University, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048338308","display_name":"Hung-Shin Lee","orcid":"https://orcid.org/0000-0001-7044-9434"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hung-Shin Lee","raw_affiliation_strings":["Institute of Information Science, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044008055","display_name":"Yu Tsao","orcid":"https://orcid.org/0000-0001-6956-0418"},"institutions":[{"id":"https://openalex.org/I4210086894","display_name":"Research Center for Information Technology Innovation, Academia Sinica","ror":"https://ror.org/000zgvm20","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086894","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu Tsao","raw_affiliation_strings":["Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210086894"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071214181","display_name":"Hsin\u2010Min Wang","orcid":"https://orcid.org/0000-0003-3599-5071"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hsin-Min Wang","raw_affiliation_strings":["Institute of Information Science, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102858202"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":1.7483,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.87336406,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"30","issue":null,"first_page":"638","last_page":"642"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.9137982130050659},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7393882274627686},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.6696258187294006},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6267236471176147},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5494765639305115},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.49135151505470276},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.48185810446739197},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.4692307114601135},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.46647483110427856},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.46098241209983826},{"id":"https://openalex.org/keywords/extractor","display_name":"Extractor","score":0.45115649700164795},{"id":"https://openalex.org/keywords/speaker-identification","display_name":"Speaker identification","score":0.42179161310195923},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3971739113330841},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35502007603645325},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3503836393356323},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11034786701202393}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.9137982130050659},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7393882274627686},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.6696258187294006},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6267236471176147},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5494765639305115},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.49135151505470276},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.48185810446739197},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.4692307114601135},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.46647483110427856},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.46098241209983826},{"id":"https://openalex.org/C117978034","wikidata":"https://www.wikidata.org/wiki/Q5422192","display_name":"Extractor","level":2,"score":0.45115649700164795},{"id":"https://openalex.org/C2986627078","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker identification","level":3,"score":0.42179161310195923},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3971739113330841},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35502007603645325},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3503836393356323},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11034786701202393},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2023.3279781","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2023.3279781","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6299999952316284}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1589137271","https://openalex.org/W1606894530","https://openalex.org/W1965819578","https://openalex.org/W2038101708","https://openalex.org/W2065427498","https://openalex.org/W2081074144","https://openalex.org/W2115130489","https://openalex.org/W2121812409","https://openalex.org/W2150769028","https://openalex.org/W2169353679","https://openalex.org/W2460742184","https://openalex.org/W2638067502","https://openalex.org/W2794506738","https://openalex.org/W2805869973","https://openalex.org/W2889418727","https://openalex.org/W2890964092","https://openalex.org/W2896538040","https://openalex.org/W2902864383","https://openalex.org/W2952752702","https://openalex.org/W2962788625","https://openalex.org/W2972449503","https://openalex.org/W2972680151","https://openalex.org/W2972949456","https://openalex.org/W2988502885","https://openalex.org/W3008357631","https://openalex.org/W3010196324","https://openalex.org/W3015199127","https://openalex.org/W3015780472","https://openalex.org/W3016031604","https://openalex.org/W3025260599","https://openalex.org/W3095212884","https://openalex.org/W3133834828","https://openalex.org/W3163019736","https://openalex.org/W3196595845","https://openalex.org/W4214556932","https://openalex.org/W4221165661","https://openalex.org/W4225792560","https://openalex.org/W4289656378","https://openalex.org/W4297841865","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6774558098","https://openalex.org/W6840066292"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W4318225096","https://openalex.org/W106647055","https://openalex.org/W2545131480","https://openalex.org/W2087341511","https://openalex.org/W1521049138","https://openalex.org/W4247736853","https://openalex.org/W2997340161","https://openalex.org/W1964028329","https://openalex.org/W2136038945"],"abstract_inverted_index":{"Strong":[0],"representations":[1,32],"of":[2,43,51,54,100],"target":[3],"speakers":[4,11,55,114],"can":[5],"help":[6],"extract":[7],"important":[8],"information":[9],"about":[10],"and":[12,39,65,74,80],"detect":[13],"corresponding":[14],"temporal":[15,79],"regions":[16],"in":[17,56,77,107],"multi-speaker":[18],"conversations.":[19],"In":[20],"this":[21],"study,":[22],"we":[23],"propose":[24],"a":[25,47,57,66,71,86,108],"neural":[26],"architecture":[27],"that":[28,95,121],"simultaneously":[29],"extracts":[30],"speaker":[31,36,45,60,81],"consistent":[33],"with":[34,112],"the":[35,41,52,91,101],"diarization":[37,129],"objective":[38],"detects":[40],"presence":[42],"each":[44],"on":[46,90],"frame-by-frame":[48],"basis":[49],"regardless":[50],"number":[53],"conversation.":[58],"A":[59],"representation":[61],"(called":[62],"z-vector)":[63],"extractor":[64],"time-speaker":[67],"contextualizer,":[68],"implemented":[69],"by":[70],"residual":[72],"network":[73],"processing":[75],"data":[76],"both":[78],"dimensions,":[82],"are":[83],"integrated":[84],"into":[85],"unified":[87],"framework.":[88],"Tests":[89],"CALLHOME":[92],"corpus":[93],"show":[94,120],"our":[96,122],"model":[97,123],"outperforms":[98],"most":[99],"methods":[102],"proposed":[103],"so":[104],"far.":[105],"Evaluations":[106],"more":[109],"challenging":[110],"case":[111],"simultaneous":[113],"ranging":[115],"from":[116],"2":[117],"to":[118,126],"7":[119],"achieves":[124],"6.4%":[125],"30.9%":[127],"relative":[128],"error":[130],"rate":[131],"reductions":[132],"over":[133],"several":[134],"typical":[135],"baselines.":[136]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2023-05-25T00:00:00"}
