{"id":"https://openalex.org/W4313387459","doi":"https://doi.org/10.1109/taslp.2022.3221042","title":"Generalization Ability Improvement of Speaker Representation and Anti-Interference for Speaker Verification","display_name":"Generalization Ability Improvement of Speaker Representation and Anti-Interference for Speaker Verification","publication_year":2022,"publication_date":"2022-11-17","ids":{"openalex":"https://openalex.org/W4313387459","doi":"https://doi.org/10.1109/taslp.2022.3221042"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2022.3221042","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3221042","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054849851","display_name":"Qian-Bei Hong","orcid":"https://orcid.org/0000-0003-2123-1601"},"institutions":[{"id":"https://openalex.org/I84653119","display_name":"Academia Sinica","ror":"https://ror.org/05bxb3784","country_code":"TW","type":"facility","lineage":["https://openalex.org/I84653119"]},{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Qian-Bei Hong","raw_affiliation_strings":["Graduate Program of Multimedia Systems and Intelligent Computing, National Cheng Kung University and Academia Sinica, Tainan, Taiwan"],"raw_orcid":"https://orcid.org/0000-0003-2123-1601","affiliations":[{"raw_affiliation_string":"Graduate Program of Multimedia Systems and Intelligent Computing, National Cheng Kung University and Academia Sinica, Tainan, Taiwan","institution_ids":["https://openalex.org/I91807558","https://openalex.org/I84653119"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103251327","display_name":"Chung\u2010Hsien Wu","orcid":"https://orcid.org/0000-0002-3947-2123"},"institutions":[{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chung-Hsien Wu","raw_affiliation_strings":["Department of Computer Science and Information Engineering, National Cheng Kung University, Tainan, Taiwan"],"raw_orcid":"https://orcid.org/0000-0002-3947-2123","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Cheng Kung University, Tainan, Taiwan","institution_ids":["https://openalex.org/I91807558"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071214181","display_name":"Hsin\u2010Min Wang","orcid":"https://orcid.org/0000-0003-3599-5071"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hsin-Min Wang","raw_affiliation_strings":["Institute of Information Science, Academia Sinica, Taipei, Taiwan"],"raw_orcid":"https://orcid.org/0000-0003-3599-5071","affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2775,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.6475511,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"31","issue":null,"first_page":"486","last_page":"499"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.7890588045120239},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.7670804262161255},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6796913146972656},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6628105640411377},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6513431668281555},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5795656442642212},{"id":"https://openalex.org/keywords/interference","display_name":"Interference (communication)","score":0.5607408285140991},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5313606262207031},{"id":"https://openalex.org/keywords/norm","display_name":"Norm (philosophy)","score":0.4801573157310486},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.4294856786727905},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.42617231607437134},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36840569972991943},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.327384352684021},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1824071705341339},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.09433656930923462}],"concepts":[{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.7890588045120239},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.7670804262161255},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6796913146972656},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6628105640411377},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6513431668281555},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5795656442642212},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.5607408285140991},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5313606262207031},{"id":"https://openalex.org/C191795146","wikidata":"https://www.wikidata.org/wiki/Q3878446","display_name":"Norm (philosophy)","level":2,"score":0.4801573157310486},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.4294856786727905},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.42617231607437134},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36840569972991943},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.327384352684021},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1824071705341339},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.09433656930923462},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2022.3221042","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3221042","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7300000190734863,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G4169684317","display_name":null,"funder_award_id":"MOST 108-2221-E-006-103-MY3","funder_id":"https://openalex.org/F4320322795","funder_display_name":"Ministry of Science and Technology, Taiwan"}],"funders":[{"id":"https://openalex.org/F4320322795","display_name":"Ministry of Science and Technology, Taiwan","ror":"https://ror.org/02kv4zf79"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W67277430","https://openalex.org/W125553504","https://openalex.org/W1494198834","https://openalex.org/W1524333225","https://openalex.org/W2046056978","https://openalex.org/W2101556109","https://openalex.org/W2117671523","https://openalex.org/W2121812409","https://openalex.org/W2150769028","https://openalex.org/W2194775991","https://openalex.org/W2219249508","https://openalex.org/W2290689761","https://openalex.org/W2395750323","https://openalex.org/W2516764878","https://openalex.org/W2651834199","https://openalex.org/W2696967604","https://openalex.org/W2718701456","https://openalex.org/W2726515241","https://openalex.org/W2734811608","https://openalex.org/W2746742816","https://openalex.org/W2747165665","https://openalex.org/W2748466034","https://openalex.org/W2748488820","https://openalex.org/W2752782242","https://openalex.org/W2753709519","https://openalex.org/W2783693219","https://openalex.org/W2784163702","https://openalex.org/W2808631503","https://openalex.org/W2809661872","https://openalex.org/W2887793820","https://openalex.org/W2888897023","https://openalex.org/W2889519245","https://openalex.org/W2890964092","https://openalex.org/W2913340405","https://openalex.org/W2916301830","https://openalex.org/W2928165649","https://openalex.org/W2938358845","https://openalex.org/W2949811029","https://openalex.org/W2956925952","https://openalex.org/W2962788625","https://openalex.org/W2963466847","https://openalex.org/W2963470929","https://openalex.org/W2963902346","https://openalex.org/W2964054038","https://openalex.org/W2981087920","https://openalex.org/W3015253990","https://openalex.org/W3015368919","https://openalex.org/W3015598461","https://openalex.org/W3024869864","https://openalex.org/W3096235116","https://openalex.org/W3097244734","https://openalex.org/W3099631870","https://openalex.org/W3103152812","https://openalex.org/W3141797743","https://openalex.org/W3142516134","https://openalex.org/W3163505255","https://openalex.org/W3198259309","https://openalex.org/W4200515686","https://openalex.org/W4221154746","https://openalex.org/W4283031701","https://openalex.org/W4286611450","https://openalex.org/W4286981691","https://openalex.org/W4288091954","https://openalex.org/W4296143113","https://openalex.org/W6631362777","https://openalex.org/W6688816777","https://openalex.org/W6769178842","https://openalex.org/W6784400926","https://openalex.org/W6804998948"],"related_works":["https://openalex.org/W1968216131","https://openalex.org/W2355011896","https://openalex.org/W3089964815","https://openalex.org/W2206035908","https://openalex.org/W1581134722","https://openalex.org/W66821593","https://openalex.org/W2149220986","https://openalex.org/W1521299571","https://openalex.org/W2144470400","https://openalex.org/W2911612049"],"abstract_inverted_index":{"The":[0],"ability":[1,37,82],"to":[2,4,33,38,51,87,102,183],"generalize":[3],"mismatches":[5],"between":[6],"training":[7],"and":[8,11,45,50,128,144,161,175,200],"testing":[9],"conditions":[10,49,197],"resist":[12],"interference":[13,56,107,229],"from":[14,57,108,230],"other":[15,58,109,135,231],"speakers":[16,59,110,160],"is":[17,73,100,131,218,235,242],"crucial":[18],"for":[19,75],"the":[20,35,41,53,61,80,84,89,104,116,118,125,129,148,166,171,184,190,202,208,212,216,223,226,233,246],"performance":[21,130],"of":[22,55,64,83,91,106,158,225,228,245],"speaker":[23,66,92,119,193],"verification.":[24],"In":[25,115,147,211],"this":[26],"paper,":[27],"we":[28],"propose":[29],"two":[30,65,179],"novel":[31],"approaches":[32],"improve":[34,88],"generalization":[36,81],"deal":[39],"with":[40],"mismatched":[42,214],"recorded":[43],"scenarios":[44],"languages":[46],"in":[47,177,189,195],"test":[48],"reduce":[52,103],"influence":[54,105,227],"on":[60,111,133,150,198],"similarity":[62,113],"measurement":[63],"embeddings.":[67,93],"First,":[68],"parent":[69],"embedding":[70,120],"learning":[71],"(PEL)":[72],"used":[74,101,243],"model":[76],"training,":[77],"which":[78],"exploits":[79],"shared":[85],"structure":[86],"representation":[90],"Second,":[94],"partial":[95],"adaptive":[96],"score":[97,249],"normalization":[98,250],"(PAS-Norm)":[99],"embedding-based":[112],"measures.":[114],"experiments,":[117],"models":[121],"are":[122],"trained":[123],"using":[124],"VoxCeleb2":[126],"dataset,":[127],"evaluated":[132],"four":[134],"datasets":[136],"under":[137],"different":[138],"conditions,":[139,215],"including":[140],"VoxCeleb1,":[141,151],"Librispeech,":[142],"SITW,":[143],"CN-Celeb":[145],"datasets.":[146],"experiments":[149,191],"evaluation":[152,224],"results":[153],"considering":[154],"a":[155],"large":[156],"number":[157],"verification":[159,194],"identity":[162],"restrictions":[163],"show":[164],"that":[165],"proposed":[167,203],"PEL-based":[168,204],"system":[169,205],"reduces":[170],"EER":[172,217,234],"by":[173,220,238],"6.0%":[174],"4.9%":[176],"these":[178],"cases,":[180],"respectively,":[181],"compared":[182],"state-of-the-art":[185],"(SOTA)":[186],"system.":[187,210],"Furthermore,":[188],"evaluating":[192],"mismatch":[196],"SITW":[199],"CN-Celeb,":[201],"also":[206],"outperforms":[207],"SOTA":[209],"language":[213],"reduced":[219,237],"8.3%.":[221],"For":[222],"speakers,":[232],"significantly":[236],"24.4%":[239],"when":[240],"PAS-Norm":[241],"instead":[244],"baseline":[247],"AS-Norm":[248],"method.":[251]},"counts_by_year":[{"year":2023,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
