{"id":"https://openalex.org/W2405137844","doi":"https://doi.org/10.1109/icassp.2016.7472638","title":"Online speaker diarization using adapted i-vector transforms","display_name":"Online speaker diarization using adapted i-vector transforms","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2405137844","doi":"https://doi.org/10.1109/icassp.2016.7472638","mag":"2405137844"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2016.7472638","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472638","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103551993","display_name":"Weizhong Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Weizhong Zhu","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029030495","display_name":"Jason Pelecanos","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Pelecanos","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5103551993"],"corresponding_institution_ids":["https://openalex.org/I1341412227"],"apc_list":null,"apc_paid":null,"fwci":4.79190694,"has_fulltext":false,"cited_by_count":39,"citation_normalized_percentile":{"value":0.97647925,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5045","last_page":"5049"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.9502372145652771},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7325369119644165},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6712538003921509},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6049175262451172},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5594574213027954},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.4932333827018738},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.47167760133743286},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46939319372177124},{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.4277627170085907},{"id":"https://openalex.org/keywords/principal-component-analysis","display_name":"Principal component analysis","score":0.42481809854507446},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42367714643478394},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14322149753570557}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.9502372145652771},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7325369119644165},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6712538003921509},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6049175262451172},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5594574213027954},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.4932333827018738},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.47167760133743286},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46939319372177124},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.4277627170085907},{"id":"https://openalex.org/C27438332","wikidata":"https://www.wikidata.org/wiki/Q2873","display_name":"Principal component analysis","level":2,"score":0.42481809854507446},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42367714643478394},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14322149753570557},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2016.7472638","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472638","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6299999952316284,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W67702164","https://openalex.org/W164219236","https://openalex.org/W1482605500","https://openalex.org/W1484181928","https://openalex.org/W1512532673","https://openalex.org/W1916834241","https://openalex.org/W1965819578","https://openalex.org/W2038101708","https://openalex.org/W2041823554","https://openalex.org/W2081074144","https://openalex.org/W2100969003","https://openalex.org/W2121812409","https://openalex.org/W2149220986","https://openalex.org/W2150769028","https://openalex.org/W2159591770","https://openalex.org/W2174668315","https://openalex.org/W2404786457","https://openalex.org/W6602779673","https://openalex.org/W6628802092","https://openalex.org/W6628911050","https://openalex.org/W6640010188","https://openalex.org/W6670935036","https://openalex.org/W6713438330"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2149220986","https://openalex.org/W1493012537","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W1999004162","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2023466863","https://openalex.org/W2696990509"],"abstract_inverted_index":{"Many":[0],"speaker":[1,61,84,168],"diarization":[2,85,169],"systems":[3,10],"operate":[4],"in":[5,63,89,140],"an":[6,82,141],"off-line":[7],"mode.":[8],"Such":[9,23],"typically":[11],"find":[12],"homogeneous":[13],"segments":[14,19,38,113,136],"and":[15],"then":[16],"cluster":[17],"these":[18],"according":[20],"to":[21,58,115,122,183],"speaker.":[22],"algorithms,":[24],"like":[25],"bottom-up":[26],"clustering,":[27,31],"k-means":[28],"or":[29,137],"spectral":[30],"generally":[32],"require":[33],"the":[34,97,102,148,151,177],"registration":[35],"of":[36,105,107],"all":[37],"before":[39],"clustering":[40],"can":[41,129],"begin.":[42],"However,":[43,125],"for":[44,171],"real-time":[45],"applications":[46],"such":[47],"as":[48,154],"with":[49,150,163],"multi-person":[50],"voice":[51],"interactive":[52],"systems,":[53],"there":[54,133],"is":[55],"a":[56,64,73,76,90,117,172],"need":[57],"perform":[59],"online":[60,142],"assignment":[62],"strict":[65,91,173],"left-to-right":[66,92,174],"fashion.":[67,93],"In":[68],"this":[69,127],"paper":[70],"we":[71,165],"propose":[72],"novel":[74],"Maximum":[75],"Posteriori":[77],"(MAP)":[78],"adapted":[79],"transform":[80],"within":[81],"i-vector":[83],"framework,":[86],"that":[87,101],"operates":[88],"Previous":[94],"work":[95],"by":[96,120],"community":[98],"has":[99],"shown":[100],"principal":[103],"components":[104],"variation":[106],"fixed":[108],"dimensional":[109],"i-vectors":[110,156],"learned":[111],"across":[112],"tend":[114],"indicate":[116],"strong":[118],"basis":[119,128],"which":[121],"separate":[123],"speakers.":[124],"determining":[126],"be":[130],"problematic":[131],"when":[132,138],"are":[134,157],"few":[135],"operating":[139],"manner.":[143],"The":[144],"proposed":[145],"method":[146],"blends":[147],"prior":[149],"estimated":[152],"subspace":[153],"more":[155],"observed.":[158],"Given":[159],"oracle":[160],"SAD":[161],"segments,":[162],"adaptation":[164],"achieve":[166],"3.2%":[167],"error":[170],"constraint":[175],"on":[176],"LDC":[178],"Callhome":[179],"English":[180],"Corpus":[181],"compared":[182],"4.8%":[184],"without":[185],"adaptation.":[186]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
