{"id":"https://openalex.org/W2889016587","doi":"https://doi.org/10.21437/interspeech.2018-1515","title":"An Improved Deep Embedding Learning Method for Short Duration Speaker Verification","display_name":"An Improved Deep Embedding Learning Method for Short Duration Speaker Verification","publication_year":2018,"publication_date":"2018-08-28","ids":{"openalex":"https://openalex.org/W2889016587","doi":"https://doi.org/10.21437/interspeech.2018-1515","mag":"2889016587"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2018-1515","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-1515","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024223042","display_name":"Zhifu Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhifu Gao","raw_affiliation_strings":["National Engineering Laboratory of Speech and Language Information Processing University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory of Speech and Language Information Processing University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100381758","display_name":"Yan Song","orcid":"https://orcid.org/0000-0002-5668-9068"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Song","raw_affiliation_strings":["National Engineering Laboratory of Speech and Language Information Processing University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory of Speech and Language Information Processing University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000620878","display_name":"Ian McLoughlin","orcid":"https://orcid.org/0000-0001-7111-2008"},"institutions":[{"id":"https://openalex.org/I20581793","display_name":"University of Kent","ror":"https://ror.org/00xkeyj56","country_code":"GB","type":"education","lineage":["https://openalex.org/I20581793"]},{"id":"https://openalex.org/I167056439","display_name":"Medway School of Pharmacy","ror":"https://ror.org/00fa9v295","country_code":"GB","type":"education","lineage":["https://openalex.org/I167056439"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ian McLoughlin","raw_affiliation_strings":["School of Computing, University of Kent, Medway, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Kent, Medway, UK","institution_ids":["https://openalex.org/I167056439","https://openalex.org/I20581793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038369085","display_name":"Wu Guo","orcid":"https://orcid.org/0000-0002-3779-7944"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wu Guo","raw_affiliation_strings":["National Engineering Laboratory of Speech and Language Information Processing University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory of Speech and Language Information Processing University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057227915","display_name":"Li-Rong Dai","orcid":"https://orcid.org/0000-0002-0859-2827"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lirong Dai","raw_affiliation_strings":["National Engineering Laboratory of Speech and Language Information Processing University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory of Speech and Language Information Processing University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5024223042"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":2.4433,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.9150442,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3578","last_page":"3582"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7901153564453125},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7280808091163635},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6821568012237549},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.6797824501991272},{"id":"https://openalex.org/keywords/nist","display_name":"NIST","score":0.6778564453125},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6769090294837952},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6311709880828857},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6095341444015503},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6055042743682861},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5087869763374329},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.41565680503845215}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7901153564453125},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7280808091163635},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6821568012237549},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.6797824501991272},{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.6778564453125},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6769090294837952},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6311709880828857},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6095341444015503},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6055042743682861},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5087869763374329},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.41565680503845215}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2018-1515","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-1515","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},{"id":"pmh:oai:kar.kent.ac.uk:67451","is_oa":false,"landing_page_url":"https://doi.org/10.21437/Interspeech.2018-1515>)","pdf_url":null,"source":{"id":"https://openalex.org/S4377196264","display_name":"Kent Academic Repository (University of Kent)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I20581793","host_organization_name":"University of Kent","host_organization_lineage":["https://openalex.org/I20581793"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W123007118","https://openalex.org/W1524333225","https://openalex.org/W1836465849","https://openalex.org/W1960777822","https://openalex.org/W2046056978","https://openalex.org/W2078169166","https://openalex.org/W2121415728","https://openalex.org/W2150769028","https://openalex.org/W2183016404","https://openalex.org/W2395750323","https://openalex.org/W2402146185","https://openalex.org/W2406312423","https://openalex.org/W2519091744","https://openalex.org/W2587150483","https://openalex.org/W2592939477","https://openalex.org/W2612434969","https://openalex.org/W2746742816","https://openalex.org/W2748488820","https://openalex.org/W2749173541","https://openalex.org/W2749938256","https://openalex.org/W2962850830","https://openalex.org/W2963840672","https://openalex.org/W4251033893"],"related_works":["https://openalex.org/W2158491338","https://openalex.org/W2807901368","https://openalex.org/W2133733652","https://openalex.org/W2072658171","https://openalex.org/W2606392311","https://openalex.org/W2320042380","https://openalex.org/W4385956668","https://openalex.org/W2900895161","https://openalex.org/W4380838366","https://openalex.org/W4401096132"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"an":[3],"improved":[4],"deep":[5,21,32],"embedding":[6,74],"learning":[7,75],"method":[8,109],"based":[9,76],"on":[10,77,153,175],"convolutional":[11,126],"neural":[12,33],"networks":[13],"(CNN)":[14],"for":[15,72,86,116],"short-duration":[16],"speaker":[17,39,119],"verification":[18],"(SV).":[19],"Existing":[20],"learning-based":[22],"SV":[23,182],"methods":[24],"generally":[25],"extract":[26],"frontend":[27,73],"embeddings":[28,53],"from":[29,137,166],"a":[30,44,58,95],"feed-forward":[31],"network,":[34],"in":[35],"which":[36],"the":[37,48,78,84,122,131,134,138,143,154],"long-term":[38,118],"characteristics":[40],"are":[41,54,70,91,128,151],"captured":[42],"via":[43,57],"pooling":[45,108],"operation":[46],"over":[47],"input":[49],"speech.":[50],"The":[51],"extracted":[52],"then":[55],"scored":[56],"backend":[59],"model,":[60],"such":[61],"as":[62],"Probabilistic":[63],"Linear":[64],"Discriminative":[65],"Analysis":[66],"(PLDA).":[67],"Two":[68],"improvements":[69],"proposed":[71,147],"CNN":[79],"structure:":[80],"(1)":[81],"Motivated":[82],"by":[83],"WaveNet":[85],"speech":[87],"synthesis,":[88],"dilated":[89],"filters":[90],"designed":[92],"to":[93,112,168],"achieve":[94],"tradeoff":[96],"between":[97],"computational":[98],"efficiency":[99],"and":[100,103,186],"receptive-filter":[101],"size;":[102],"(2)":[104],"A":[105],"novel":[106],"cross-convolutional-layer":[107],"is":[110],"exploited":[111],"capture":[113],"$1^{st}$-order":[114],"statistics":[115],"modelling":[117],"characteristics.":[120],"Specifically,":[121],"activations":[123],"of":[124,133,145,158],"one":[125],"layer":[127],"aggregated":[129],"with":[130,163],"guidance":[132],"feature":[135],"maps":[136],"successive":[139],"layer.":[140],"To":[141],"evaluate":[142],"effectiveness":[144],"our":[146],"methods,":[148],"extensive":[149],"experiments":[150],"conducted":[152],"modified":[155],"female":[156],"portion":[157],"NIST":[159],"SRE":[160],"2010":[161],"evaluations,":[162],"conditions":[164],"ranging":[165],"10s-10s":[167],"5s-4s.":[169],"Excellent":[170],"performance":[171],"has":[172],"been":[173],"achieved":[174],"each":[176],"evaluation":[177],"condition,":[178],"significantly":[179],"outperforming":[180],"existing":[181],"systems":[183],"using":[184],"i-vector":[185],"d-vector":[187],"embeddings.":[188]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
