{"id":"https://openalex.org/W3095476325","doi":"https://doi.org/10.21437/interspeech.2020-2031","title":"Text-Independent Speaker Verification with Dual Attention Network","display_name":"Text-Independent Speaker Verification with Dual Attention Network","publication_year":2020,"publication_date":"2020-10-25","ids":{"openalex":"https://openalex.org/W3095476325","doi":"https://doi.org/10.21437/interspeech.2020-2031","mag":"3095476325"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2020-2031","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-2031","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101527946","display_name":"Jingyu Li","orcid":"https://orcid.org/0000-0002-1163-4557"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jingyu Li","raw_affiliation_strings":["Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001795601","display_name":"Tan Lee","orcid":"https://orcid.org/0000-0002-7089-3436"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tan Lee","raw_affiliation_strings":["Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101527946"],"corresponding_institution_ids":["https://openalex.org/I177725633"],"apc_list":null,"apc_paid":null,"fwci":1.7233,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.87855391,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"956","last_page":"960"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9789000153541565,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9472000002861023,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8038893938064575},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.7241309285163879},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.6782004237174988},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5809643864631653},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4740569293498993},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3820876479148865},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.3675333857536316},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11398279666900635}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8038893938064575},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.7241309285163879},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.6782004237174988},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5809643864631653},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4740569293498993},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3820876479148865},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.3675333857536316},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11398279666900635},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2020-2031","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-2031","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.41999998688697815}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W2046056978","https://openalex.org/W2129244720","https://openalex.org/W2191779130","https://openalex.org/W2194775991","https://openalex.org/W2219249508","https://openalex.org/W2696967604","https://openalex.org/W2726515241","https://openalex.org/W2746742816","https://openalex.org/W2748488820","https://openalex.org/W2794506738","https://openalex.org/W2799800213","https://openalex.org/W2808631503","https://openalex.org/W2890964092","https://openalex.org/W2937033898","https://openalex.org/W2939634425","https://openalex.org/W2962898354","https://openalex.org/W2962898677","https://openalex.org/W2963091558","https://openalex.org/W2963263347","https://openalex.org/W2972657845","https://openalex.org/W2972986505","https://openalex.org/W2981087920","https://openalex.org/W3103152812","https://openalex.org/W4295312788","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W4313854686","https://openalex.org/W1521299571","https://openalex.org/W2499802997","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W2150532155","https://openalex.org/W1516392727","https://openalex.org/W2140022733"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,15,53,57,96,103,108,119,124],"novel":[4],"design":[5],"of":[6,17,52,67,88,166],"attention":[7,49,59],"model":[8,13,50],"for":[9],"textindependent":[10],"speaker":[11],"verification.The":[12],"takes":[14],"pair":[16],"input":[18,32,94],"utterances":[19,33],"and":[20,56,107,123],"generates":[21],"an":[22,162],"utterance-level":[23,69],"embedding":[24,106],"to":[25,36,64],"represent":[26],"speaker-specific":[27],"characteristics":[28],"in":[29,92],"each":[30,98],"utterance.The":[31],"are":[34,43,73,83],"expected":[35],"have":[37],"highly":[38],"similar":[39],"embeddings":[40,115],"if":[41],"they":[42],"from":[44,75],"the":[45,65,68,76,80,86,89,93,114,150,156],"same":[46],"speaker.The":[47],"proposed":[48],"consists":[51],"self-attention":[54,71,104],"module":[55],"mutual":[58],"module,":[60],"which":[61],"jointly":[62],"contributes":[63],"generation":[66],"embedding.The":[70,111],"weights":[72,82],"computed":[74,84],"utterance":[77,91,99],"itself":[78],"while":[79],"mutual-attention":[81,109],"with":[85],"involvement":[87],"other":[90],"pairs.As":[95],"result,":[97],"is":[100,116,135],"represented":[101],"by":[102,118],"weighted":[105,110],"similarity":[112],"between":[113],"measured":[117],"cosine":[120],"distance":[121],"score":[122],"binary":[125],"classifier":[126],"output":[127],"score.The":[128],"whole":[129],"model,":[130],"named":[131],"Dual":[132,151],"Attention":[133,152],"Network,":[134],"trained":[136],"end-to-end":[137],"on":[138,143],"Voxceleb":[139,144],"database.The":[140],"evaluation":[141],"results":[142],"1":[145],"test":[146],"set":[147],"show":[148],"that":[149],"Network":[153],"significantly":[154],"outperforms":[155],"baseline":[157],"systems.The":[158],"best":[159],"result":[160],"yields":[161],"equal":[163],"error":[164],"rate":[165],"1.6%.":[167]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":9}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
