{"id":"https://openalex.org/W2967957380","doi":"https://doi.org/10.21437/interspeech.2019-1891","title":"A Study of x-Vector Based Speaker Recognition on Short Utterances","display_name":"A Study of x-Vector Based Speaker Recognition on Short Utterances","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2967957380","doi":"https://doi.org/10.21437/interspeech.2019-1891","mag":"2967957380"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-1891","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-1891","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://eprints.qut.edu.au/132058/1/short-utt-xvector.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023434638","display_name":"Ahilan Kanagasundaram","orcid":"https://orcid.org/0000-0002-0533-7986"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]},{"id":"https://openalex.org/I198412587","display_name":"University of Jaffna","ror":"https://ror.org/02fwjgw17","country_code":"LK","type":"education","lineage":["https://openalex.org/I198412587"]}],"countries":["AU","LK"],"is_corresponding":true,"raw_author_name":"A. Kanagasundaram","raw_affiliation_strings":["Department of Electrical & Electronic Engineering, University of Jaffna, Sri Lanka","Speech and Audio Research Lab, SAIVT, Queensland University of Technology, Australia"],"affiliations":[{"raw_affiliation_string":"Department of Electrical & Electronic Engineering, University of Jaffna, Sri Lanka","institution_ids":["https://openalex.org/I198412587"]},{"raw_affiliation_string":"Speech and Audio Research Lab, SAIVT, Queensland University of Technology, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055128383","display_name":"Sridha Sridharan","orcid":"https://orcid.org/0000-0003-4316-9001"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"S. Sridharan","raw_affiliation_strings":["Speech and Audio Research Lab, SAIVT, Queensland University of Technology, Australia"],"affiliations":[{"raw_affiliation_string":"Speech and Audio Research Lab, SAIVT, Queensland University of Technology, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108477321","display_name":"G. Sriram","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"G. Sriram","raw_affiliation_strings":["LEAP Lab, Indian Institute of Science, India"],"affiliations":[{"raw_affiliation_string":"LEAP Lab, Indian Institute of Science, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034229169","display_name":"S Tambe Prachi","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"S. Prachi","raw_affiliation_strings":["LEAP Lab, Indian Institute of Science, India"],"affiliations":[{"raw_affiliation_string":"LEAP Lab, Indian Institute of Science, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034095159","display_name":"Clinton Fookes","orcid":"https://orcid.org/0000-0002-8515-6324"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"C. Fookes","raw_affiliation_strings":["Speech and Audio Research Lab, SAIVT, Queensland University of Technology, Australia"],"affiliations":[{"raw_affiliation_string":"Speech and Audio Research Lab, SAIVT, Queensland University of Technology, Australia","institution_ids":["https://openalex.org/I160993911"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5023434638"],"corresponding_institution_ids":["https://openalex.org/I160993911","https://openalex.org/I198412587"],"apc_list":null,"apc_paid":null,"fwci":2.4533,"has_fulltext":true,"cited_by_count":32,"citation_normalized_percentile":{"value":0.91711755,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2943","last_page":"2947"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9488000273704529,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7310121059417725},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6989972591400146},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.6865798234939575},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.5038275122642517},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.43142184615135193},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42528074979782104},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38531389832496643},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3449063301086426}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7310121059417725},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6989972591400146},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.6865798234939575},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.5038275122642517},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.43142184615135193},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42528074979782104},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38531389832496643},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3449063301086426}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2019-1891","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-1891","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.qut.edu.au:132058","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.qut.edu.au/132058/1/short-utt-xvector.pdf","source":{"id":"https://openalex.org/S4306402607","display_name":"QUT ePrints (Queensland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I160993911","host_organization_name":"Queensland University of Technology","host_organization_lineage":["https://openalex.org/I160993911"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 20th Annual Conference of the International Speech Communication Association, INTERSPEECH 2019. Vol. 2019-September.","raw_type":"Chapter in Book, Report or Conference volume"}],"best_oa_location":{"id":"pmh:oai:eprints.qut.edu.au:132058","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.qut.edu.au/132058/1/short-utt-xvector.pdf","source":{"id":"https://openalex.org/S4306402607","display_name":"QUT ePrints (Queensland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I160993911","host_organization_name":"Queensland University of Technology","host_organization_lineage":["https://openalex.org/I160993911"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 20th Annual Conference of the International Speech Communication Association, INTERSPEECH 2019. Vol. 2019-September.","raw_type":"Chapter in Book, Report or Conference volume"},"sustainable_development_goals":[{"score":0.7099999785423279,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2967957380.pdf","grobid_xml":"https://content.openalex.org/works/W2967957380.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W67277430","https://openalex.org/W97072897","https://openalex.org/W123007118","https://openalex.org/W1524333225","https://openalex.org/W1528954144","https://openalex.org/W2183001115","https://openalex.org/W2290689761","https://openalex.org/W2395750323","https://openalex.org/W2406312423","https://openalex.org/W2587150483","https://openalex.org/W2612380871","https://openalex.org/W2612434969","https://openalex.org/W2748488820","https://openalex.org/W2890964092","https://openalex.org/W2962853205","https://openalex.org/W3016884365","https://openalex.org/W3037288542"],"related_works":["https://openalex.org/W2253551762","https://openalex.org/W1521049138","https://openalex.org/W1516392727","https://openalex.org/W2140022733","https://openalex.org/W3048612421","https://openalex.org/W2911612049","https://openalex.org/W2160753975","https://openalex.org/W2150532155","https://openalex.org/W3012250009","https://openalex.org/W2378903289"],"abstract_inverted_index":{"The":[0,32,136],"aim":[1],"of":[2,100,144,150],"this":[3],"work":[4],"is":[5,54,111,129],"to":[6],"gain":[7],"insights":[8],"into":[9],"how":[10],"the":[11,36,51,63,101,113,126,139,148,158],"deep":[12],"neural":[13],"network":[14],"(DNN)":[15],"models":[16],"should":[17,153],"be":[18,40,94,154],"trained":[19,130],"for":[20,45,157,168],"short":[21,46,84,132],"utterance":[22,47,133],"evaluation":[23,48,123,151],"conditions":[24],"in":[25],"an":[26],"x-vector":[27,64,114],"based":[28],"speaker":[29,37,52,90],"verification":[30],"system.":[31],"study":[33],"suggests":[34],"that":[35,82,112,142,171],"embedding":[38,53],"can":[39,93],"extracted":[41,55,95],"with":[42],"reduced":[43],"dimensions":[44],"conditions.":[49,79],"When":[50],"from":[56,96],"deeper":[57,98],"layer":[58,99],"which":[59,103],"has":[60],"lower":[61],"dimension,":[62],"system":[65,115],"achieves":[66,116],"14%":[67],"relative":[68,118,175],"improvement":[69,119,176],"over":[70,179],"baseline":[71,180],"approach":[72,167],"on":[73,75,120,177],"EER":[74,178],"NIST2010":[76,121],"5sec-5sec":[77,122],"truncated":[78],"We":[80],"surmise":[81],"since":[83],"utterances":[85,146,152],"have":[86],"less":[87,105],"phonetic":[88,106],"information":[89],"discriminative":[91],"x-vectors":[92],"a":[97,164,173],"DNN":[102],"captures":[104],"information.":[107],"Another":[108],"interesting":[109],"finding":[110],"5%":[117],"condition":[124],"when":[125],"back-end":[127],"PLDA":[128,169],"using":[131],"development":[134,145],"data.":[135],"results":[137],"confirms":[138],"intuitive":[140],"expectation":[141],"duration":[143,149,159],"and":[147],"matched.":[155],"Finally,":[156],"mismatch":[160],"condition,":[161],"we":[162],"propose":[163],"variance":[165],"normalization":[166],"training":[170],"provides":[172],"4%":[174],"approach.":[181]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":8}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
