{"id":"https://openalex.org/W2319316830","doi":"https://doi.org/10.1109/taslp.2016.2544660","title":"Improving Short Utterance Speaker Recognition by Modeling Speech Unit Classes","display_name":"Improving Short Utterance Speaker Recognition by Modeling Speech Unit Classes","publication_year":2016,"publication_date":"2016-03-21","ids":{"openalex":"https://openalex.org/W2319316830","doi":"https://doi.org/10.1109/taslp.2016.2544660","mag":"2319316830"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2016.2544660","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2016.2544660","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050701255","display_name":"Lantian Li","orcid":"https://orcid.org/0000-0003-4274-7930"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lantian Li","raw_affiliation_strings":["Center for Speech and Language Technologies, Tsinghua National Laboratory for Information Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua National Laboratory for Information Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100391494","display_name":"Dong Wang","orcid":"https://orcid.org/0000-0002-6992-7950"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dong Wang","raw_affiliation_strings":["Center for Speech and Language Technologies, Tsinghua National Laboratory for Information Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua National Laboratory for Information Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101465981","display_name":"Chenhao Zhang","orcid":"https://orcid.org/0000-0002-4463-7703"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenhao Zhang","raw_affiliation_strings":["Center for Speech and Language Technologies, Tsinghua National Laboratory for Information Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua National Laboratory for Information Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084318285","display_name":"Thomas Fang Zheng","orcid":"https://orcid.org/0000-0002-0249-4767"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Thomas Fang Zheng","raw_affiliation_strings":["Center for Speech and Language Technologies, Tsinghua National Laboratory for Information Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua National Laboratory for Information Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5050701255"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":11.5688,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":{"value":0.98413918,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"24","issue":"6","first_page":"1129","last_page":"1139"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7667050361633301},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7500090599060059},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.7127959728240967},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.696192741394043},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.5484215021133423},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.49945569038391113},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.4889570474624634},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3545166850090027},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3351028263568878},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.291991651058197}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7667050361633301},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7500090599060059},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.7127959728240967},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.696192741394043},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.5484215021133423},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.49945569038391113},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.4889570474624634},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3545166850090027},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3351028263568878},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.291991651058197},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2016.2544660","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2016.2544660","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8299999833106995,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G803898012","display_name":null,"funder_award_id":"61371136","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8204283577","display_name":null,"funder_award_id":"61271389","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":68,"referenced_works":["https://openalex.org/W6173268","https://openalex.org/W67277430","https://openalex.org/W93148613","https://openalex.org/W102958777","https://openalex.org/W112821631","https://openalex.org/W1486632395","https://openalex.org/W1503813490","https://openalex.org/W1524333225","https://openalex.org/W1586405805","https://openalex.org/W1599623585","https://openalex.org/W1916834241","https://openalex.org/W1985690171","https://openalex.org/W1993482042","https://openalex.org/W2002342963","https://openalex.org/W2033436836","https://openalex.org/W2039057510","https://openalex.org/W2041823554","https://openalex.org/W2046056978","https://openalex.org/W2064630946","https://openalex.org/W2068562110","https://openalex.org/W2069883713","https://openalex.org/W2069976350","https://openalex.org/W2100969003","https://openalex.org/W2107638917","https://openalex.org/W2119468495","https://openalex.org/W2121750345","https://openalex.org/W2121812409","https://openalex.org/W2122766873","https://openalex.org/W2125636736","https://openalex.org/W2129244720","https://openalex.org/W2146871184","https://openalex.org/W2150341604","https://openalex.org/W2150769028","https://openalex.org/W2152402526","https://openalex.org/W2154278880","https://openalex.org/W2158069733","https://openalex.org/W2163065939","https://openalex.org/W2166111255","https://openalex.org/W2183016404","https://openalex.org/W2186517251","https://openalex.org/W2186774823","https://openalex.org/W2201142001","https://openalex.org/W2363927127","https://openalex.org/W2398362606","https://openalex.org/W2917756095","https://openalex.org/W2972712416","https://openalex.org/W3037288542","https://openalex.org/W3045485643","https://openalex.org/W3147161844","https://openalex.org/W4205947740","https://openalex.org/W6600258082","https://openalex.org/W6602762607","https://openalex.org/W6603823966","https://openalex.org/W6604530394","https://openalex.org/W6628849183","https://openalex.org/W6630285273","https://openalex.org/W6631362777","https://openalex.org/W6635953567","https://openalex.org/W6640010188","https://openalex.org/W6678071057","https://openalex.org/W6684372933","https://openalex.org/W6686491854","https://openalex.org/W6686645966","https://openalex.org/W6686850381","https://openalex.org/W6687645958","https://openalex.org/W6712660717","https://openalex.org/W6760207327","https://openalex.org/W6983670251"],"related_works":["https://openalex.org/W2309273277","https://openalex.org/W2061937230","https://openalex.org/W1574295218","https://openalex.org/W2405439032","https://openalex.org/W2132658536","https://openalex.org/W2070212102","https://openalex.org/W2544241817","https://openalex.org/W2158882055","https://openalex.org/W2914603036","https://openalex.org/W2124470186"],"abstract_inverted_index":{"Short":[0],"utterance":[1],"speaker":[2,141],"recognition":[3,142],"(SUSR)":[4],"is":[5,83],"highly":[6],"challenging":[7],"due":[8],"to":[9,25,35,85,117],"the":[10,19,26,31,37,74,145,160,176,183,187],"limited":[11],"enrollment":[12,45,197],"and/or":[13],"test":[14,146],"data.":[15],"We":[16],"argue":[17],"that":[18,54,64,137],"difficulty":[20],"can":[21,99,189],"be":[22,100,190],"largely":[23],"attributed":[24],"mismatched":[27],"prior":[28],"distributions":[29],"of":[30,61],"speech":[32,56,68,87,90,121,203],"data":[33,78,198],"used":[34],"train":[36],"universal":[38],"background":[39],"model":[40,107,184],"(UBM)":[41],"and":[42,46,70,154],"those":[43],"for":[44,201],"test.":[47],"This":[48],"paper":[49],"presents":[50],"a":[51,59,80,106,129,139,165],"novel":[52],"solution":[53],"distributes":[55],"signals":[57],"into":[58,89],"multitude":[60],"acoustic":[62],"subregions":[63],"are":[65,148,199],"defined":[66],"by":[67],"units,":[69],"models":[71,98],"speakers":[72],"within":[73],"subregions.":[75],"To":[76],"avoid":[77],"sparsity,":[79],"data-driven":[81],"approach":[82,109],"proposed":[84,161],"cluster":[86],"units":[88],"unit":[91,122,204],"classes,":[92],"based":[93,110],"on":[94,111,128,138],"which":[95],"robust":[96],"subregion":[97,162],"constructed.":[101],"Further":[102],"more,":[103],"we":[104],"propose":[105],"synthesis":[108,185],"maximum":[112],"likelihood":[113],"linear":[114],"regression":[115],"(MLLR)":[116],"deal":[118],"with":[119,175,182],"no-data":[120],"classes.":[123,205],"The":[124,134],"experiments":[125],"were":[126],"conducted":[127],"publicly":[130],"available":[131,200],"database":[132],"SUD12.":[133],"results":[135],"demonstrated":[136],"text-independent":[140],"task":[143],"where":[144,195],"utterances":[147],"no":[149,196],"longer":[150],"than":[151,157],"2":[152],"seconds":[153],"mostly":[155],"shorter":[156],"0.5":[158],"seconds,":[159],"modeling":[163],"offered":[164],"21.51%":[166],"relative":[167],"reduction":[168],"in":[169,193],"equal":[170],"error":[171],"rate":[172],"(EER),":[173],"compared":[174],"standard":[177],"GMM-UBM":[178],"baseline.":[179],"In":[180],"addition,":[181],"approach,":[186],"performance":[188],"greatly":[191],"improved":[192],"scenarios":[194],"some":[202]},"counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":7},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
