{"id":"https://openalex.org/W4406858676","doi":"https://doi.org/10.1109/apsipaasc63619.2025.10849338","title":"Joseph: phonetic-aware speaker embedding for far-field speaker verification","display_name":"Joseph: phonetic-aware speaker embedding for far-field speaker verification","publication_year":2024,"publication_date":"2024-12-03","ids":{"openalex":"https://openalex.org/W4406858676","doi":"https://doi.org/10.1109/apsipaasc63619.2025.10849338"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc63619.2025.10849338","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc63619.2025.10849338","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111193687","display_name":"Zezhong Jin","orcid":"https://orcid.org/0009-0001-0828-2933"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Zezhong Jin","raw_affiliation_strings":["The Hong Kong Polytechnic University,Department of Electrical and Electronic Engineering,Hong Kong SAR"],"affiliations":[{"raw_affiliation_string":"The Hong Kong Polytechnic University,Department of Electrical and Electronic Engineering,Hong Kong SAR","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019467579","display_name":"Youzhi Tu","orcid":"https://orcid.org/0000-0002-9580-2414"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Youzhi Tu","raw_affiliation_strings":["The Hong Kong Polytechnic University,Department of Electrical and Electronic Engineering,Hong Kong SAR"],"affiliations":[{"raw_affiliation_string":"The Hong Kong Polytechnic University,Department of Electrical and Electronic Engineering,Hong Kong SAR","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068768998","display_name":"Man\u2010Wai Mak","orcid":"https://orcid.org/0000-0001-8854-3760"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Man-Wai Mak","raw_affiliation_strings":["The Hong Kong Polytechnic University,Department of Electrical and Electronic Engineering,Hong Kong SAR"],"affiliations":[{"raw_affiliation_string":"The Hong Kong Polytechnic University,Department of Electrical and Electronic Engineering,Hong Kong SAR","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5111193687"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":0.7274,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.78429026,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.96670001745224,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7039307951927185},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.6537579298019409},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5783653855323792},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5574336051940918},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5306593179702759},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5281819105148315},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.49393516778945923},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3587000370025635},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29056745767593384},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.060325682163238525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7039307951927185},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.6537579298019409},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5783653855323792},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5574336051940918},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5306593179702759},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5281819105148315},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.49393516778945923},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3587000370025635},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29056745767593384},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.060325682163238525},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc63619.2025.10849338","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc63619.2025.10849338","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2041823554","https://openalex.org/W2053150333","https://openalex.org/W2127141656","https://openalex.org/W2136682440","https://openalex.org/W2150769028","https://openalex.org/W2173629880","https://openalex.org/W2726515241","https://openalex.org/W2747165665","https://openalex.org/W2808631503","https://openalex.org/W2890964092","https://openalex.org/W2964247977","https://openalex.org/W2972609576","https://openalex.org/W2972743990","https://openalex.org/W2978904488","https://openalex.org/W3010925296","https://openalex.org/W3024869864","https://openalex.org/W3097459123","https://openalex.org/W3135733472","https://openalex.org/W4214503194","https://openalex.org/W4225276605","https://openalex.org/W4280569506","https://openalex.org/W4308091070","https://openalex.org/W6631362777","https://openalex.org/W6688816777","https://openalex.org/W6769178842","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W4389984014","https://openalex.org/W2144208207","https://openalex.org/W1509309911","https://openalex.org/W1599425004","https://openalex.org/W2118860825","https://openalex.org/W2096510939","https://openalex.org/W2144470400","https://openalex.org/W1516392727","https://openalex.org/W2140022733","https://openalex.org/W2911612049"],"abstract_inverted_index":{"Performing":[0],"speaker":[1,29,66,80,98,121,149],"verification":[2],"(SV)":[3],"at":[4],"a":[5,23,47,79],"distance":[6],"from":[7],"the":[8,15,34,65,74,105,115,119,124,131],"sound":[9],"source":[10],"is":[11,88,144],"challenging":[12],"because":[13],"of":[14,17,37,78],"interference":[16],"noise":[18,38,109],"and":[19,39,51,100,110,130],"reverberation.":[20,40,111],"In":[21],"such":[22],"situation,":[24],"incorporating":[25],"phonetic":[26,57,70,90,139],"information":[27,71,91,99,140],"into":[28],"embeddings":[30,67],"can":[31,92],"help":[32],"reduce":[33],"adverse":[35],"effects":[36],"Inspired":[41],"by":[42,72],"this":[43],"observation,":[44],"we":[45],"propose":[46],"Jointly":[48],"optimized":[49],"speaker-embedding":[50],"phonetic-matching":[52],"(Joseph)":[53],"framework":[54,63,117],"to":[55,68,108],"exploit":[56],"content":[58],"for":[59,104,146],"far-field":[60,142],"SV.":[61],"The":[62,86],"encourages":[64],"preserve":[69,93],"matching":[73],"frame-based":[75],"feature":[76],"maps":[77],"embedding":[81,122],"network":[82],"with":[83,97],"wav2vec\u2019s":[84],"vectors.":[85],"intuition":[87],"that":[89,114,137],"low-level":[94],"acoustic":[95],"dynamics":[96],"thus":[101],"partly":[102],"compensate":[103],"degradation":[106],"due":[107],"Results":[112],"show":[113],"proposed":[116],"outperforms":[118],"standard":[120],"on":[123],"VOiCES":[125],"Challenge":[126],"2019":[127],"evaluation":[128],"set":[129],"VoxCeleb1":[132],"test":[133],"set.":[134],"This":[135],"indicates":[136],"leveraging":[138],"under":[141],"conditions":[143],"effective":[145],"learning":[147],"robust":[148],"representations.":[150]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
