{"id":"https://openalex.org/W2584329820","doi":"https://doi.org/10.1109/slt.2016.7846261","title":"End-to-End attention based text-dependent speaker verification","display_name":"End-to-End attention based text-dependent speaker verification","publication_year":2016,"publication_date":"2016-12-01","ids":{"openalex":"https://openalex.org/W2584329820","doi":"https://doi.org/10.1109/slt.2016.7846261","mag":"2584329820"},"language":"en","primary_location":{"id":"doi:10.1109/slt.2016.7846261","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2016.7846261","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101785327","display_name":"Shi-Xiong Zhang","orcid":"https://orcid.org/0009-0007-7803-2407"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shi-Xiong Zhang","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100345092","display_name":"Zhuo Chen","orcid":"https://orcid.org/0000-0002-9011-7928"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhuo Chen","raw_affiliation_strings":["Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100702071","display_name":"Yong Zhao","orcid":"https://orcid.org/0000-0003-2644-952X"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yong Zhao","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077401426","display_name":"Yifan Gong","orcid":"https://orcid.org/0000-0001-8786-3391"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yifan Gong","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101785327"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":35.5636,"has_fulltext":false,"cited_by_count":166,"citation_normalized_percentile":{"value":0.99710242,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"171","last_page":"178"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.723441481590271},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.6041927337646484},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.5687119960784912},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5061168074607849},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.37313807010650635},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.3334192931652069},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2704324424266815}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.723441481590271},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.6041927337646484},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.5687119960784912},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5061168074607849},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37313807010650635},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.3334192931652069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2704324424266815}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt.2016.7846261","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2016.7846261","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.6800000071525574}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W182365161","https://openalex.org/W1530239534","https://openalex.org/W1686810756","https://openalex.org/W1836465849","https://openalex.org/W1860566644","https://openalex.org/W1872348600","https://openalex.org/W1895577753","https://openalex.org/W1909308924","https://openalex.org/W1993216012","https://openalex.org/W1995562189","https://openalex.org/W1996512145","https://openalex.org/W2012897754","https://openalex.org/W2034940213","https://openalex.org/W2039057510","https://openalex.org/W2046056978","https://openalex.org/W2057038408","https://openalex.org/W2061013432","https://openalex.org/W2064364374","https://openalex.org/W2069883713","https://openalex.org/W2078169166","https://openalex.org/W2107638917","https://openalex.org/W2114925438","https://openalex.org/W2121890799","https://openalex.org/W2129244720","https://openalex.org/W2147768505","https://openalex.org/W2148488622","https://openalex.org/W2150769028","https://openalex.org/W2168900466","https://openalex.org/W2384495648","https://openalex.org/W2397634864","https://openalex.org/W2404292690","https://openalex.org/W2407374891","https://openalex.org/W2484386994","https://openalex.org/W2505121225","https://openalex.org/W2916986993","https://openalex.org/W2949117887","https://openalex.org/W2962835968","https://openalex.org/W2962853205","https://openalex.org/W2963174142","https://openalex.org/W2963490782","https://openalex.org/W6607467483","https://openalex.org/W6631680487","https://openalex.org/W6637373629","https://openalex.org/W6638667902","https://openalex.org/W6639261775","https://openalex.org/W6639916541","https://openalex.org/W6712325649"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W66821593","https://openalex.org/W4297807400","https://openalex.org/W2249138175","https://openalex.org/W1521299571","https://openalex.org/W4313854686","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W1516392727","https://openalex.org/W2140022733"],"abstract_inverted_index":{"A":[0],"new":[1],"type":[2],"of":[3,164],"End-to-End":[4],"system":[5,168],"for":[6,26,152],"text-dependent":[7],"speaker":[8,27,50,61,79,91,136,155,174],"verification":[9,28,141,175],"is":[10,111],"presented":[11],"in":[12],"this":[13,56],"paper.":[14],"Previously,":[15],"using":[16,114],"the":[17,66,90,95,100,106,123,148,158,162,165],"phonetic":[18,96],"discriminate/speaker":[19],"discriminate":[20,62,92],"DNN":[21],"as":[22],"a":[23,60,129,133,139],"feature":[24],"extractor":[25],"has":[29],"shown":[30],"promising":[31],"results.":[32],"The":[33,85,102,118,143],"extracted":[34],"frame-level":[35,68],"(bottleneck,":[36],"posterior":[37],"or":[38,53],"d-vector)":[39],"features":[40,71],"are":[41,72],"equally":[42],"weighted":[43],"and":[44,94,108,132],"aggregated":[45],"to":[46,64,75,98,156],"compute":[47],"an":[48,77,82,115],"utterance-level":[49,78],"representation":[51],"(d-vector":[52],"i-vector).":[54],"In":[55],"work":[57],"we":[58],"use":[59],"CNN":[63,107],"extract":[65],"noise-robust":[67],"features.":[69],"These":[70],"smartly":[73,146],"combined":[74],"form":[76],"vector":[80],"through":[81],"attention":[83,87,109],"mechanism.":[84],"proposed":[86,166],"model":[88],"takes":[89],"information":[93,97],"learn":[99],"weights.":[101],"whole":[103],"system,":[104],"including":[105],"model,":[110],"joint":[112],"optimized":[113],"end-to-end":[116,167],"criterion.":[117],"training":[119],"algorithm":[120,144],"imitates":[121],"exactly":[122],"evaluation":[124],"process":[125],"\u2014":[126],"directly":[127],"mapping":[128],"test":[130],"utterance":[131],"few":[134],"target":[135,154],"utterances":[137],"into":[138],"single":[140],"score.":[142],"can":[145],"select":[147],"most":[149],"similar":[150],"impostor":[151],"each":[153],"train":[157],"network.":[159],"We":[160],"demonstrated":[161],"effectiveness":[163],"on":[169],"Windows":[170],"10":[171],"\u201cHey":[172],"Cortana\u201d":[173],"task.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":14},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":41},{"year":2019,"cited_by_count":50},{"year":2018,"cited_by_count":23},{"year":2017,"cited_by_count":10}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
