{"id":"https://openalex.org/W3011599803","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023024","title":"Speaker-discriminative Embedding Learning via Affinity Matrix for Short Utterance Speaker Verification","display_name":"Speaker-discriminative Embedding Learning via Affinity Matrix for Short Utterance Speaker Verification","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W3011599803","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023024","mag":"3011599803"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc47483.2019.9023024","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023024","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111291634","display_name":"Junyi Peng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128628","display_name":"Peking University Shenzhen Hospital","ror":"https://ror.org/03kkjyb15","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210128628"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junyi Peng","raw_affiliation_strings":["Peking University Shenzhen Graduate School, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Peking University Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I4210128628"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038895203","display_name":"Rongzhi Gu","orcid":"https://orcid.org/0000-0003-1861-9170"},"institutions":[{"id":"https://openalex.org/I4210128628","display_name":"Peking University Shenzhen Hospital","ror":"https://ror.org/03kkjyb15","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210128628"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongzhi Gu","raw_affiliation_strings":["Peking University Shenzhen Graduate School, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Peking University Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I4210128628"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002795838","display_name":"Yuexian Zou","orcid":"https://orcid.org/0000-0001-9999-6140"},"institutions":[{"id":"https://openalex.org/I4210128628","display_name":"Peking University Shenzhen Hospital","ror":"https://ror.org/03kkjyb15","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210128628"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuexian Zou","raw_affiliation_strings":["Peking University Shenzhen Graduate School, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Peking University Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I4210128628"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100676721","display_name":"Wenwu Wang","orcid":"https://orcid.org/0000-0002-8393-5703"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wenwu Wang","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, UK"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, UK","institution_ids":["https://openalex.org/I28290843"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5111291634"],"corresponding_institution_ids":["https://openalex.org/I4210128628"],"apc_list":null,"apc_paid":null,"fwci":0.14,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60574799,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"314","last_page":"319"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7744590044021606},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7251617312431335},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.651578962802887},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.6422008872032166},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.6305320858955383},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.608371376991272},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5789336562156677},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5497770309448242},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.548193097114563},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.47567978501319885},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4590320885181427},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4486694931983948},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.42672470211982727},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.4200386703014374},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4128449857234955}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7744590044021606},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7251617312431335},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.651578962802887},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.6422008872032166},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.6305320858955383},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.608371376991272},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5789336562156677},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5497770309448242},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.548193097114563},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.47567978501319885},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4590320885181427},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4486694931983948},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42672470211982727},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.4200386703014374},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4128449857234955},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc47483.2019.9023024","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023024","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7300000190734863,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1524333225","https://openalex.org/W2046056978","https://openalex.org/W2096733369","https://openalex.org/W2150769028","https://openalex.org/W2157364932","https://openalex.org/W2193413348","https://openalex.org/W2194775991","https://openalex.org/W2584329820","https://openalex.org/W2613006358","https://openalex.org/W2726515241","https://openalex.org/W2746742816","https://openalex.org/W2747238065","https://openalex.org/W2747262669","https://openalex.org/W2748488820","https://openalex.org/W2760938034","https://openalex.org/W2889016587","https://openalex.org/W2889045432","https://openalex.org/W2890964092","https://openalex.org/W2962788625","https://openalex.org/W2963460857","https://openalex.org/W2963576203","https://openalex.org/W6631362777","https://openalex.org/W6687566353"],"related_works":["https://openalex.org/W112947718","https://openalex.org/W2206035908","https://openalex.org/W66821593","https://openalex.org/W2149220986","https://openalex.org/W1521299571","https://openalex.org/W1998549096","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W2144470400","https://openalex.org/W2911612049"],"abstract_inverted_index":{"Text-independent":[0],"short":[1],"utterance":[2,14],"speaker":[3,26,57,141,148,158],"verification":[4],"(TI-SUSV)":[5],"task":[6,16],"remains":[7],"more":[8,97,135],"challenging":[9],"compared":[10],"to":[11,18,52,68,95,114,133,162],"the":[12,39,53,63,71,127,154,157,173],"full-length":[13],"SV":[15,35],"due":[17],"inaccurately":[19],"estimated":[20],"feature":[21,86,116],"statistics":[22],"or":[23],"insufficient":[24],"distinguishable":[25],"embeddings.":[27,58,99],"It":[28],"is":[29,78,111,124,151],"noted":[30],"that":[31,177],"recently":[32],"developed":[33],"end-to-end":[34],"systems":[36],"(E2E-SV)":[37],"achieve":[38],"state-of-the-art":[40],"on":[41,80,172,187],"several":[42],"datasets,":[43],"which":[44],"directly":[45],"learn":[46],"a":[47,101,120,140,181],"mapping":[48],"from":[49],"speech":[50,85],"features":[51],"compact":[54],"fixed":[55],"length":[56],"In":[59,138],"this":[60],"study,":[61],"following":[62],"E2E-SV":[64],"pipeline,":[65],"we":[66],"strive":[67],"further":[69],"improve":[70,115],"accuracy":[72],"of":[73,156],"TI-SUSV":[74],"task.":[75],"Our":[76],"research":[77],"based":[79],"two":[81],"intuitive":[82],"ideas:":[83],"better":[84,91,164],"representation":[87,117],"for":[88],"SUs":[89],"and":[90,167,184],"training":[92],"loss":[93,123],"function":[94],"obtain":[96,134,163],"discriminative":[98],"Specifically,":[100],"bidirectional":[102],"gated":[103],"recurrent":[104],"unit":[105],"network":[106],"with":[107],"residual":[108],"connection":[109],"(Res-BGRU)":[110],"firstly":[112],"designed":[113],"capability.":[118],"Secondly,":[119],"novel":[121],"affinity":[122,143,160],"proposed":[125],"where":[126],"mini-batch":[128],"data":[129],"has":[130],"been":[131],"manipulated":[132],"supervision":[136],"information.":[137],"details,":[139],"identity":[142,149],"matrix":[144,161],"formed":[145],"by":[146],"one-hot":[147],"vectors":[150],"taken":[152],"as":[153],"supervisor":[155],"embedding":[159],"inter-speaker":[165],"separability":[166],"intra-speaker":[168],"compactness.":[169],"Experimental":[170],"results":[171],"Voxceleb1":[174],"dataset":[175],"show":[176],"our":[178],"system":[179,186],"outperforms":[180],"conventional":[182],"i-vector":[183],"x-vector":[185],"TI-SUSV.":[188]},"counts_by_year":[{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
