{"id":"https://openalex.org/W4304091753","doi":"https://doi.org/10.1145/3503161.3548240","title":"Phoneme-Aware Adaptation with Discrepancy Minimization and Dynamically-Classified Vector for Text-independent Speaker Verification","display_name":"Phoneme-Aware Adaptation with Discrepancy Minimization and Dynamically-Classified Vector for Text-independent Speaker Verification","publication_year":2022,"publication_date":"2022-10-10","ids":{"openalex":"https://openalex.org/W4304091753","doi":"https://doi.org/10.1145/3503161.3548240"},"language":"en","primary_location":{"id":"doi:10.1145/3503161.3548240","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3548240","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025137072","display_name":"Jia Wang","orcid":"https://orcid.org/0000-0003-2308-2259"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jia Wang","raw_affiliation_strings":["Shenzhen University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065123431","display_name":"Tianhao Lan","orcid":"https://orcid.org/0009-0001-9459-4713"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianhao Lan","raw_affiliation_strings":["Shenzhen University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047093288","display_name":"Jie Chen","orcid":"https://orcid.org/0000-0002-9811-1694"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Chen","raw_affiliation_strings":["Shenzhen University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060564734","display_name":"Chengwen Luo","orcid":"https://orcid.org/0000-0003-0293-0781"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengwen Luo","raw_affiliation_strings":["Shenzhen University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101533570","display_name":"Chao Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Wu","raw_affiliation_strings":["Shenzhen University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100393871","display_name":"Jianqiang Li","orcid":"https://orcid.org/0000-0002-2208-962X"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianqiang Li","raw_affiliation_strings":["Shenzhen University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5025137072"],"corresponding_institution_ids":["https://openalex.org/I180726961"],"apc_list":null,"apc_paid":null,"fwci":0.2079,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.41962939,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"6737","last_page":"6745"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.9206106662750244},{"id":"https://openalex.org/keywords/subnet","display_name":"Subnet","score":0.8309484720230103},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7983895540237427},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.707831621170044},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5994088649749756},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5482398867607117},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.49638205766677856},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.481986939907074},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.4713903069496155},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4634108245372772},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.4479243755340576},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.44756877422332764},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.437186062335968},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4330926537513733},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.42629197239875793},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2466931939125061}],"concepts":[{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.9206106662750244},{"id":"https://openalex.org/C21099817","wikidata":"https://www.wikidata.org/wiki/Q7631721","display_name":"Subnet","level":2,"score":0.8309484720230103},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7983895540237427},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.707831621170044},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5994088649749756},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5482398867607117},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.49638205766677856},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.481986939907074},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.4713903069496155},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4634108245372772},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.4479243755340576},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.44756877422332764},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.437186062335968},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4330926537513733},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.42629197239875793},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2466931939125061},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3503161.3548240","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3548240","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1589137271","https://openalex.org/W2039057510","https://openalex.org/W2046056978","https://openalex.org/W2078528584","https://openalex.org/W2114925438","https://openalex.org/W2150769028","https://openalex.org/W2160815625","https://openalex.org/W2194775991","https://openalex.org/W2402146185","https://openalex.org/W2406778302","https://openalex.org/W2566925314","https://openalex.org/W2696967604","https://openalex.org/W2747238065","https://openalex.org/W2748488820","https://openalex.org/W2784163702","https://openalex.org/W2888968865","https://openalex.org/W2890964092","https://openalex.org/W2938358845","https://openalex.org/W2963351448","https://openalex.org/W2963466847","https://openalex.org/W2963516811","https://openalex.org/W2969985801","https://openalex.org/W2972743990","https://openalex.org/W2998469040","https://openalex.org/W3015648588","https://openalex.org/W3015964387","https://openalex.org/W3024758121","https://openalex.org/W3035693354","https://openalex.org/W3103152812","https://openalex.org/W3160329778","https://openalex.org/W3163421828","https://openalex.org/W6631362777","https://openalex.org/W6688325169"],"related_works":["https://openalex.org/W3095152779","https://openalex.org/W3119773509","https://openalex.org/W3128220219","https://openalex.org/W3148366653","https://openalex.org/W2162582511","https://openalex.org/W4255044973","https://openalex.org/W2499802997","https://openalex.org/W2112059504","https://openalex.org/W2727729836","https://openalex.org/W1960256358"],"abstract_inverted_index":{"Recent":[0],"studies":[1],"show":[2],"that":[3,65],"introducing":[4],"phonetic":[5],"information":[6],"into":[7],"multi-task":[8],"learning":[9],"could":[10,66,137],"significantly":[11],"improve":[12],"the":[13,28,47,99,107,113,119,123,150,164],"performance":[14],"of":[15,21,30,49,56,106,118],"speaker":[16,86,120,152],"embedding":[17],"extraction.":[18],"However,":[19],"benefits":[20],"such":[22],"architectures":[23],"usually":[24],"depend":[25],"largely":[26],"on":[27,156],"availibility":[29],"a":[31,74,130],"well-matched":[32],"dataset,":[33],"and":[34,54,81,122,143],"domain":[35],"or":[36],"language":[37],"mismatch":[38,114],"would":[39],"result":[40],"in":[41,44,175],"obvious":[42],"dropdown":[43],"performance.":[45],"Meanwhile,":[46],"utilization":[48],"these":[50,57,90],"massive":[51],"mismatched":[52],"data":[53,117,158],"application":[55],"auxiliary":[58],"tasks":[59],"may":[60],"bring":[61],"many":[62],"rich":[63],"features":[64,142],"be":[67],"exploited.":[68],"In":[69],"this":[70],"paper,":[71],"we":[72,128],"propose":[73],"phoneme-aware":[75],"adaptation":[76,168],"network":[77],"with":[78],"discrepancy":[79,102],"minimization":[80],"dynamically-classified":[82,131],"vector":[83],"for":[84],"text-independent":[85],"verification":[87],"to":[88,111,148],"address":[89],"abovementioned":[91],"challenges.":[92],"More":[93],"specifically,":[94],"our":[95],"method":[96],"first":[97],"utilize":[98],"maximum":[100],"mean":[101],"(MMD)":[103],"as":[104],"part":[105],"total":[108],"loss":[109,134],"function":[110],"solve":[112],"between":[115],"training":[116],"subnet":[121],"phoneme":[124,167],"subnet.":[125],"And":[126],"then":[127],"use":[129],"vector-guided":[132],"softmax":[133],"(DV-Softmax),":[135],"which":[136],"adaptively":[138],"emphasize":[139],"different":[140],"high-quality":[141],"dynamically":[144],"change":[145],"their":[146],"weights,":[147],"guide":[149],"discriminative":[151],"embedding.":[153],"Experimental":[154],"results":[155],"VoxCeleb1":[157],"set":[159],"confirmed":[160],"its":[161],"superiority":[162],"against":[163],"other":[165],"state-of-the-art":[166],"methods,":[169],"providing":[170],"approximately":[171],"15%":[172],"relative":[173],"improvements":[174],"equal":[176],"error":[177],"rate":[178],"(EER).":[179]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
