{"id":"https://openalex.org/W3206114047","doi":"https://doi.org/10.1109/icassp43922.2022.9746639","title":"Multi-View Self-Attention Based Transformer for Speaker Recognition","display_name":"Multi-View Self-Attention Based Transformer for Speaker Recognition","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3206114047","doi":"https://doi.org/10.1109/icassp43922.2022.9746639","mag":"3206114047"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746639","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746639","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100431267","display_name":"Rui Wang","orcid":"https://orcid.org/0000-0002-5211-2114"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rui Wang","raw_affiliation_strings":["Tongji University,Department of Computer Science and Technology","Department of Computer Science and Technology, Tongji University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tongji University,Department of Computer Science and Technology","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077069016","display_name":"Junyi Ao","orcid":"https://orcid.org/0000-0001-8979-0835"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]},{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Junyi Ao","raw_affiliation_strings":["Southern University of Science and Technology,Department of Computer Science and Engineering","Department of Computer Science and Engineering, Southern University of Science and Technology","Department of Computing, The Hong Kong Polytechnic University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Southern University of Science and Technology","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106557565","display_name":"Long Zhou","orcid":"https://orcid.org/0009-0006-1919-4943"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Zhou","raw_affiliation_strings":["Microsoft Research Asia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101635405","display_name":"Shujie Liu","orcid":"https://orcid.org/0009-0008-0785-8882"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shujie Liu","raw_affiliation_strings":["Microsoft Research Asia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004301353","display_name":"Zhihua Wei","orcid":"https://orcid.org/0000-0002-5937-3907"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhihua Wei","raw_affiliation_strings":["Tongji University,Department of Computer Science and Technology","Department of Computer Science and Technology, Tongji University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tongji University,Department of Computer Science and Technology","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038062913","display_name":"Tom Ko","orcid":"https://orcid.org/0000-0002-5324-8961"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tom Ko","raw_affiliation_strings":["Southern University of Science and Technology,Department of Computer Science and Engineering","Department of Computer Science and Engineering, Southern University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Southern University of Science and Technology","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100404176","display_name":"Qing Li","orcid":"https://orcid.org/0000-0003-3370-471X"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Qing Li","raw_affiliation_strings":["The Hong Kong Polytechnic University,Department of Computing","Department of Computing, The Hong Kong Polytechnic University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong Polytechnic University,Department of Computing","institution_ids":["https://openalex.org/I14243506"]},{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100433691","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0003-1100-4835"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["Southern University of Science and Technology,Department of Computer Science and Engineering","Department of Computer Science and Engineering, Southern University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Southern University of Science and Technology","institution_ids":["https://openalex.org/I3045169105"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100431267"],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":3.4295,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.93814925,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"6732","last_page":"6736"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7989081144332886},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.74289470911026},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5588476061820984},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.5558242201805115},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5163291692733765},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.49614009261131287},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4380815029144287},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3252808451652527},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09123441576957703}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7989081144332886},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.74289470911026},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5588476061820984},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.5558242201805115},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5163291692733765},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.49614009261131287},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4380815029144287},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3252808451652527},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09123441576957703},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746639","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746639","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7400000095367432}],"awards":[],"funders":[{"id":"https://openalex.org/F4320330944","display_name":"Nature","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W2407080277","https://openalex.org/W2696967604","https://openalex.org/W2726515241","https://openalex.org/W2794506738","https://openalex.org/W2808631503","https://openalex.org/W2890964092","https://openalex.org/W2903739847","https://openalex.org/W2916104401","https://openalex.org/W2933138175","https://openalex.org/W2936774411","https://openalex.org/W2963371159","https://openalex.org/W2963403868","https://openalex.org/W2972657845","https://openalex.org/W3015197287","https://openalex.org/W3024869864","https://openalex.org/W3025075133","https://openalex.org/W3034999214","https://openalex.org/W3048784519","https://openalex.org/W3094502228","https://openalex.org/W3094929291","https://openalex.org/W3095851463","https://openalex.org/W3095992690","https://openalex.org/W3096567388","https://openalex.org/W3096918678","https://openalex.org/W3099782249","https://openalex.org/W3161873870","https://openalex.org/W3162227138","https://openalex.org/W3169320628","https://openalex.org/W3177892206","https://openalex.org/W3178647810","https://openalex.org/W3197343310","https://openalex.org/W3205644108","https://openalex.org/W3209059054","https://openalex.org/W3211278025","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6784333009","https://openalex.org/W6784809647","https://openalex.org/W6797726355","https://openalex.org/W6802465204","https://openalex.org/W6804209468"],"related_works":["https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W2249138175","https://openalex.org/W4313854686","https://openalex.org/W3162054169","https://openalex.org/W1813780412","https://openalex.org/W289407349","https://openalex.org/W2368768466","https://openalex.org/W2757081366","https://openalex.org/W3197877226"],"abstract_inverted_index":{"Initially":[0],"developed":[1],"for":[2,13,35,52,86,108],"natural":[3],"language":[4],"processing":[5,15],"(NLP),":[6],"Transformer":[7,50,77,131,173],"model":[8],"is":[9],"now":[10],"widely":[11],"used":[12],"speech":[14,44],"tasks":[16],"such":[17],"as":[18],"speaker":[19,46,53,87,109,144,167,172],"recognition,":[20,168],"due":[21],"to":[22,90,118,142],"its":[23],"powerful":[24],"sequence":[25,38],"modeling":[26,36,99],"capabilities.":[27],"However,":[28],"conventional":[29],"self-attention":[30,68,106,159],"mechanisms":[31],"are":[32],"originally":[33],"designed":[34],"textual":[37],"without":[39,81],"considering":[40],"the":[41,82,92,100,122,149,156,164,170],"characteristics":[42],"of":[43,75,94,121,166],"and":[45,70,98,128,139,151,169],"modeling.":[47],"Besides,":[48],"different":[49,76,113,119,134],"variants":[51,78,132],"recognition":[54],"have":[55],"not":[56],"been":[57],"well":[58],"studied.":[59],"In":[60],"this":[61],"work,":[62],"we":[63,102,126],"propose":[64,103],"a":[65,104],"novel":[66],"multi-view":[67,105,158],"mechanism":[69,85,107,160],"present":[71],"an":[72],"empirical":[73],"study":[74],"with":[79,133,179],"or":[80],"proposed":[83,157,171],"attention":[84,114],"recognition.":[88],"Specifically,":[89],"balance":[91],"capabilities":[93],"capturing":[95],"global":[96],"dependencies":[97],"locality,":[101],"Transformer,":[110],"in":[111,163],"which":[112],"heads":[115],"can":[116],"attend":[117],"ranges":[120],"receptive":[123],"field.":[124],"Furthermore,":[125],"introduce":[127],"compare":[129],"five":[130],"network":[135,174],"architectures,":[136],"embedding":[137],"locations,":[138],"pooling":[140],"methods":[141],"learn":[143],"embeddings.":[145],"Experimental":[146],"results":[147,177],"on":[148],"VoxCeleb1":[150],"VoxCeleb2":[152],"datasets":[153],"show":[154],"that":[155],"achieves":[161],"improvement":[162],"performance":[165],"attains":[175],"excellent":[176],"compared":[178],"state-of-the-art":[180],"models.":[181]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":4}],"updated_date":"2026-05-11T08:15:01.531666","created_date":"2025-10-10T00:00:00"}
