{"id":"https://openalex.org/W2900064592","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023352","title":"Phonetic-Attention Scoring for Deep Speaker Features in Speaker Verification","display_name":"Phonetic-Attention Scoring for Deep Speaker Features in Speaker Verification","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W2900064592","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023352","mag":"2900064592"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc47483.2019.9023352","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023352","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1811.03255","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050701255","display_name":"Lantian Li","orcid":"https://orcid.org/0000-0003-4274-7930"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lantian Li","raw_affiliation_strings":["Center for Speech and Language Technologies, Tsinghua University, Beijing, China","Center for Speech and Language Technologies, Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056437232","display_name":"Zhiyuan Tang","orcid":"https://orcid.org/0000-0002-3786-7690"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyuan Tang","raw_affiliation_strings":["Center for Speech and Language Technologies, Tsinghua University, Beijing, China","Center for Speech and Language Technologies, Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102001805","display_name":"Ying Shi","orcid":"https://orcid.org/0000-0001-8445-2300"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Shi","raw_affiliation_strings":["Center for Speech and Language Technologies, Tsinghua University, Beijing, China","Center for Speech and Language Technologies, Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100391494","display_name":"Dong Wang","orcid":"https://orcid.org/0000-0002-6992-7950"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dong Wang","raw_affiliation_strings":["Center for Speech and Language Technologies, Tsinghua University, Beijing, China","Center for Speech and Language Technologies, Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Center for Speech and Language Technologies, Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5050701255"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.1447,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.5461959,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"284","last_page":"288"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9449999928474426,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8189527988433838},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.7071505784988403},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.7049376964569092},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6715866327285767},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6053712368011475},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5939260125160217},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5579829216003418},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5359492897987366},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.49018603563308716},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.48456111550331116},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.4527955949306488},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10172712802886963}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8189527988433838},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.7071505784988403},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.7049376964569092},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6715866327285767},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6053712368011475},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5939260125160217},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5579829216003418},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5359492897987366},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.49018603563308716},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.48456111550331116},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.4527955949306488},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10172712802886963},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/apsipaasc47483.2019.9023352","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023352","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1811.03255","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1811.03255","pdf_url":"https://arxiv.org/pdf/1811.03255","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2900064592","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1811.03255","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1811.03255","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1811.03255","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1811.03255","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1811.03255","pdf_url":"https://arxiv.org/pdf/1811.03255","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.6800000071525574,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2900064592.pdf","grobid_xml":"https://content.openalex.org/works/W2900064592.grobid-xml"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W1006777433","https://openalex.org/W1589137271","https://openalex.org/W2041823554","https://openalex.org/W2046056978","https://openalex.org/W2107638917","https://openalex.org/W2114925438","https://openalex.org/W2150769028","https://openalex.org/W2327501763","https://openalex.org/W2584329820","https://openalex.org/W2587150483","https://openalex.org/W2696967604","https://openalex.org/W2747238065","https://openalex.org/W2748488820","https://openalex.org/W2794506738","https://openalex.org/W2887833611","https://openalex.org/W2888416031","https://openalex.org/W2889519245","https://openalex.org/W2962832278","https://openalex.org/W2963565199","https://openalex.org/W2964058423","https://openalex.org/W2964247977","https://openalex.org/W2964269671","https://openalex.org/W4234330420","https://openalex.org/W6631362777","https://openalex.org/W6679434410","https://openalex.org/W6688816777","https://openalex.org/W6745983287"],"related_works":["https://openalex.org/W3012310164","https://openalex.org/W769055045","https://openalex.org/W2539205319","https://openalex.org/W2288935258","https://openalex.org/W2021486117","https://openalex.org/W2523233994","https://openalex.org/W1989284072","https://openalex.org/W2163881265","https://openalex.org/W3015648588","https://openalex.org/W3134722711","https://openalex.org/W1990548145","https://openalex.org/W2804999422","https://openalex.org/W3113076898","https://openalex.org/W1484211110","https://openalex.org/W2081566031","https://openalex.org/W3153794713","https://openalex.org/W116024930","https://openalex.org/W2070411707","https://openalex.org/W2952021518","https://openalex.org/W1545418955"],"abstract_inverted_index":{"Recent":[0],"studies":[1],"have":[2],"shown":[3],"that":[4,199],"frame-level":[5,33],"deep":[6,14],"speaker":[7,47],"features":[8],"can":[9,39,111,210],"be":[10,40],"derived":[11,41],"from":[12,70],"a":[13,26,88,126,187],"neural":[15],"network":[16],"with":[17,119,180,192,201],"the":[18,32,45,60,64,74,78,85,100,103,107,113,148,151,162,169,177,202],"training":[19],"target":[20],"set":[21],"to":[22,59,84,116,160],"discriminate":[23],"speakers":[24],"by":[25,93],"short":[27],"speech":[28],"segment.":[29],"By":[30,133],"pooling":[31],"features,":[34],"utterance-level":[35],"representations,":[36],"called":[37],"d-vectors,":[38],"and":[42,106,157,221],"used":[43,159],"in":[44,154,168,215],"automatic":[46],"verification":[48],"(ASV)":[49],"task.":[50],"This":[51,97,123,145,172],"simple":[52],"average":[53,204],"pooling,":[54,205],"however,":[55],"is":[56,73,91,139,158],"inherently":[57],"sensitive":[58],"phonetic":[61,121,155,182,194],"content":[62],"of":[63,80,102,150,164,218],"utterance.":[65],"An":[66],"interesting":[67],"idea":[68,115],"borrowed":[69],"machine":[71],"translation":[72,86],"attention-based":[75],"mechanism,":[76],"where":[77],"contribution":[79,163],"an":[81,94,136],"input":[82,104],"word":[83,105],"at":[87],"particular":[89],"time":[90],"weighted":[92],"attention":[95,137],"score.":[96],"score":[98,138,146],"reflects":[99,147],"relevance":[101],"present":[108],"translation.":[109],"We":[110],"use":[112],"same":[114],"align":[117],"utterances":[118,191],"different":[120],"contents.":[122,195],"paper":[124],"proposes":[125],"phonetic-attention":[127,207],"scoring":[128,174,208],"approach":[129,175,209],"for":[130,141,190],"d-vector":[131],"systems.":[132],"this":[134,165,206],"approach,":[135],"computed":[140],"each":[142],"frame":[143,166,178],"pair.":[144],"similarity":[149],"two":[152],"frames":[153],"content,":[156],"weigh":[161],"pair":[167],"utterance-based":[170],"scoring.":[171],"new":[173],"emphasizes":[176],"pairs":[179],"similar":[181],"contents,":[183],"which":[184],"essentially":[185],"provides":[186],"soft":[188],"alignment":[189],"any":[193],"Experimental":[196],"results":[197],"show":[198],"compared":[200],"naive":[203],"deliver":[211],"consistent":[212],"performance":[213],"improvement":[214],"ASV":[216],"tasks":[217],"both":[219],"text-dependent":[220],"text-independent.":[222]},"counts_by_year":[{"year":2019,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
