{"id":"https://openalex.org/W3015197287","doi":"https://doi.org/10.1109/icassp40776.2020.9053217","title":"Multi-Resolution Multi-Head Attention in Deep Speaker Embedding","display_name":"Multi-Resolution Multi-Head Attention in Deep Speaker Embedding","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015197287","doi":"https://doi.org/10.1109/icassp40776.2020.9053217","mag":"3015197287"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053217","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053217","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100372797","display_name":"Zhiming Wang","orcid":"https://orcid.org/0000-0001-8777-1729"},"institutions":[{"id":"https://openalex.org/I4210090985","display_name":"Zhejiang Financial College","ror":"https://ror.org/00deghz86","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210090985"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhiming Wang","raw_affiliation_strings":["Ant Financial Services Group, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Financial Services Group, Hangzhou, China","institution_ids":["https://openalex.org/I4210090985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103119755","display_name":"Kaisheng Yao","orcid":"https://orcid.org/0000-0002-8949-9367"},"institutions":[{"id":"https://openalex.org/I4210090985","display_name":"Zhejiang Financial College","ror":"https://ror.org/00deghz86","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210090985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaisheng Yao","raw_affiliation_strings":["Ant Financial Services Group, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Financial Services Group, Hangzhou, China","institution_ids":["https://openalex.org/I4210090985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100371535","display_name":"Xiaolong Li","orcid":"https://orcid.org/0000-0001-7493-2650"},"institutions":[{"id":"https://openalex.org/I4210090985","display_name":"Zhejiang Financial College","ror":"https://ror.org/00deghz86","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210090985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaolong Li","raw_affiliation_strings":["Ant Financial Services Group, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Financial Services Group, Hangzhou, China","institution_ids":["https://openalex.org/I4210090985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090616461","display_name":"Shuo Fang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090985","display_name":"Zhejiang Financial College","ror":"https://ror.org/00deghz86","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210090985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuo Fang","raw_affiliation_strings":["Ant Financial Services Group, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Financial Services Group, Hangzhou, China","institution_ids":["https://openalex.org/I4210090985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100372797"],"corresponding_institution_ids":["https://openalex.org/I4210090985"],"apc_list":null,"apc_paid":null,"fwci":3.8078,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.94465979,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"6464","last_page":"6468"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.9083938598632812},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7852456569671631},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5532768964767456},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5178292393684387},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.5092395544052124},{"id":"https://openalex.org/keywords/head","display_name":"Head (geology)","score":0.48375773429870605},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4756826162338257},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.45855897665023804},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4544719159603119},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4427638053894043},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36287903785705566}],"concepts":[{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.9083938598632812},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7852456569671631},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5532768964767456},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5178292393684387},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.5092395544052124},{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.48375773429870605},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4756826162338257},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.45855897665023804},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4544719159603119},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4427638053894043},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36287903785705566},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C114793014","wikidata":"https://www.wikidata.org/wiki/Q52109","display_name":"Geomorphology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053217","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053217","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1524333225","https://openalex.org/W1533861849","https://openalex.org/W1821462560","https://openalex.org/W2107042471","https://openalex.org/W2150769028","https://openalex.org/W2194775991","https://openalex.org/W2726515241","https://openalex.org/W2746742816","https://openalex.org/W2748488820","https://openalex.org/W2784163702","https://openalex.org/W2794506738","https://openalex.org/W2889519245","https://openalex.org/W2890964092","https://openalex.org/W2937328535","https://openalex.org/W2962959915","https://openalex.org/W2963371159","https://openalex.org/W2963403868","https://openalex.org/W2964247977","https://openalex.org/W2968917279","https://openalex.org/W2972369255","https://openalex.org/W2972633940","https://openalex.org/W2991951409","https://openalex.org/W2994689640","https://openalex.org/W3008230514","https://openalex.org/W3103152812","https://openalex.org/W4385245566","https://openalex.org/W6631362777","https://openalex.org/W6631943919","https://openalex.org/W6638523607","https://openalex.org/W6676180231","https://openalex.org/W6739901393","https://openalex.org/W6761649062","https://openalex.org/W6767164110","https://openalex.org/W6771538394"],"related_works":["https://openalex.org/W2953234277","https://openalex.org/W2140186469","https://openalex.org/W2626256601","https://openalex.org/W4390421286","https://openalex.org/W2900413183","https://openalex.org/W4390975304","https://openalex.org/W147410782","https://openalex.org/W4280563792","https://openalex.org/W3022252430","https://openalex.org/W4318719684"],"abstract_inverted_index":{"Pooling":[0],"is":[1],"an":[2,102],"essential":[3],"component":[4],"to":[5,21,39,75,111,161],"capture":[6],"long-term":[7],"speaker":[8,11],"characteristics":[9],"for":[10,25,43,54,98,106],"recognition.":[12],"This":[13,109],"paper":[14],"proposes":[15],"simple":[16],"but":[17],"effective":[18],"pooling":[19,56,77,99],"methods":[20],"compute":[22],"attentive":[23,55,165],"weights":[24,166],"better":[26],"temporal":[27],"aggregation":[28],"over":[29,57],"the":[30,35,58,80,125,129,133,168],"variable-length":[31],"input":[32],"speech,":[33],"enabling":[34],"end-to-end":[36],"neural":[37],"network":[38],"have":[40],"improved":[41,162],"performance":[42,72,114,135],"discriminating":[44],"among":[45],"speakers.":[46],"Particularly,":[47],"we":[48,64,92],"observe":[49],"that":[50,100,119,146],"using":[51,121,147],"multiple":[52,122,148,152],"heads":[53,149,156],"entire":[59],"encoded":[60],"sequence,":[61],"a":[62],"method":[63,131],"term":[65],"as":[66],"global":[67],"multi-head":[68,83,96],"attention,":[69],"significantly":[70],"improves":[71],"in":[73,167],"comparison":[74],"various":[76],"methods,":[78],"including":[79],"recently":[81],"proposed":[82,130],"attention":[84,90,97],"[1].":[85],"To":[86],"improve":[87],"diversity":[88],"of":[89,118,136,141,164],"heads,":[91],"further":[93],"propose":[94],"multi-resolution":[95],"has":[101],"additional":[103],"temperature":[104],"hyperparameter":[105],"each":[107],"head.":[108],"leads":[110],"even":[112],"larger":[113],"gain,":[115],"on":[116,154],"top":[117],"achieved":[120],"heads.":[123],"On":[124],"benchmark":[126],"VoxCeleb1":[127],"dataset,":[128],"achieves":[132],"state-of-the-art":[134,170],"Equal":[137],"Error":[138],"Rate":[139],"(EER)":[140],"3.966%.":[142],"Our":[143],"analysis":[144],"shows":[145],"and":[150],"having":[151],"resolutions":[153],"these":[155],"with":[157],"different":[158],"temperatures":[159],"lead":[160],"certainty":[163],"new":[169],"system.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":14},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":3}],"updated_date":"2026-05-07T06:04:25.777469","created_date":"2025-10-10T00:00:00"}
