{"id":"https://openalex.org/W2963382494","doi":"https://doi.org/10.1109/icassp.2019.8683245","title":"Gaussian-constrained Training for Speaker Verification","display_name":"Gaussian-constrained Training for Speaker Verification","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2963382494","doi":"https://doi.org/10.1109/icassp.2019.8683245","mag":"2963382494"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2019.8683245","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8683245","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101744768","display_name":"Lantian Li","orcid":"https://orcid.org/0000-0002-5546-8060"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lantian Li","raw_affiliation_strings":["Center for Speech and Language Technologies, RIIT, Tsinghua University, China Beijing National Research Center for Information Science and Technology"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, RIIT, Tsinghua University, China Beijing National Research Center for Information Science and Technology","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056437232","display_name":"Zhiyuan Tang","orcid":"https://orcid.org/0000-0002-3786-7690"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyuan Tang","raw_affiliation_strings":["Center for Speech and Language Technologies, RIIT, Tsinghua University, China Beijing National Research Center for Information Science and Technology"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, RIIT, Tsinghua University, China Beijing National Research Center for Information Science and Technology","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102001805","display_name":"Ying Shi","orcid":"https://orcid.org/0000-0001-8445-2300"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Shi","raw_affiliation_strings":["Center for Speech and Language Technologies, RIIT, Tsinghua University, China Beijing National Research Center for Information Science and Technology"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, RIIT, Tsinghua University, China Beijing National Research Center for Information Science and Technology","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100391494","display_name":"Dong Wang","orcid":"https://orcid.org/0000-0002-6992-7950"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dong Wang","raw_affiliation_strings":["Center for Speech and Language Technologies, RIIT, Tsinghua University, China Beijing National Research Center for Information Science and Technology"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, RIIT, Tsinghua University, China Beijing National Research Center for Information Science and Technology","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101744768"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":3.68682589,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.9399035,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"6036","last_page":"6040"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.7765711545944214},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7747413516044617},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.655919075012207},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6557776927947998},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5922452211380005},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.5412876009941101},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5360264182090759},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.5312148332595825},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5072478652000427},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4726742208003998},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.469032883644104},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4643877446651459},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4491075873374939},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34928494691848755},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1150626540184021}],"concepts":[{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.7765711545944214},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7747413516044617},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.655919075012207},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6557776927947998},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5922452211380005},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.5412876009941101},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5360264182090759},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.5312148332595825},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5072478652000427},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4726742208003998},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.469032883644104},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4643877446651459},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4491075873374939},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34928494691848755},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1150626540184021},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2019.8683245","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8683245","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5099999904632568}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1524333225","https://openalex.org/W1589137271","https://openalex.org/W2039057510","https://openalex.org/W2041823554","https://openalex.org/W2046056978","https://openalex.org/W2107638917","https://openalex.org/W2114925438","https://openalex.org/W2150769028","https://openalex.org/W2183016404","https://openalex.org/W2219249508","https://openalex.org/W2516764878","https://openalex.org/W2584329820","https://openalex.org/W2587150483","https://openalex.org/W2696967604","https://openalex.org/W2726515241","https://openalex.org/W2748488820","https://openalex.org/W2890964092","https://openalex.org/W2962832278","https://openalex.org/W2963670602","https://openalex.org/W4234330420","https://openalex.org/W6631362777","https://openalex.org/W6635152626","https://openalex.org/W6688816777"],"related_works":["https://openalex.org/W1516392727","https://openalex.org/W4246918790","https://openalex.org/W4254799911","https://openalex.org/W2140735993","https://openalex.org/W133936345","https://openalex.org/W4313224873","https://openalex.org/W4313417806","https://openalex.org/W2963382494","https://openalex.org/W2900405446","https://openalex.org/W2336335760"],"abstract_inverted_index":{"Neural":[0],"models,":[1],"in":[2,42],"particular":[3],"the":[4,51,57,65,68,77,80,96,102,105,115],"d-vector":[5],"and":[6,99,117,129],"x-vector":[7],"architectures,":[8],"have":[9],"produced":[10,126],"state-of-the-art":[11],"performance":[12,78,136],"on":[13,114],"many":[14],"speaker":[15,70,107,131],"verification":[16],"tasks.":[17],"However,":[18],"two":[19],"potential":[20],"problems":[21],"of":[22,67,79,104],"these":[23,60],"neural":[24],"models":[25,31,61],"deserve":[26],"more":[27,127],"investigation.":[28],"Firstly,":[29],"both":[30],"suffer":[32],"from":[33],"`information":[34],"leak',":[35],"which":[36],"means":[37],"that":[38,53,93,121],"some":[39],"parameters":[40],"participating":[41],"model":[43],"training":[44,91,124],"will":[45],"be":[46,110],"discarded":[47],"during":[48],"inference,":[49],"i.e,":[50],"layers":[52],"are":[54],"used":[55],"as":[56],"classifier.":[58],"Secondly,":[59],"do":[62],"not":[63],"regulate":[64],"distribution":[66,103],"derived":[69,106],"vectors.":[71],"This":[72,86],"`unconstrained":[73],"distribution'":[74],"may":[75],"degrade":[76],"subsequent":[81],"scoring":[82],"component,":[83],"e.g.,":[84],"PLDA.":[85],"paper":[87],"proposes":[88],"a":[89],"Gaussian-constrained":[90],"approach":[92,125],"(1)":[94],"discards":[95],"parametric":[97],"classifier,":[98],"(2)":[100],"enforces":[101],"vectors":[108],"to":[109,134],"Gaussian.":[111],"Our":[112],"experiments":[113],"VoxCeleb":[116],"SITW":[118],"databases":[119],"demonstrated":[120],"this":[122],"new":[123],"representative":[128],"regular":[130],"embeddings,":[132],"leading":[133],"consistent":[135],"improvement.":[137]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
