{"id":"https://openalex.org/W3010925296","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023039","title":"Margin Matters: Towards More Discriminative Deep Neural Network Embeddings for Speaker Recognition","display_name":"Margin Matters: Towards More Discriminative Deep Neural Network Embeddings for Speaker Recognition","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W3010925296","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023039","mag":"3010925296"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc47483.2019.9023039","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023039","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055220405","display_name":"Xu Xiang","orcid":"https://orcid.org/0000-0003-2530-9023"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xu Xiang","raw_affiliation_strings":["MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University,Department of Computer Science and Engineering,Shanghai,China","Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University,Department of Computer Science and Engineering,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100328272","display_name":"Shuai Wang","orcid":"https://orcid.org/0000-0002-1595-3619"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuai Wang","raw_affiliation_strings":["MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University,Department of Computer Science and Engineering,Shanghai,China","Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University,Department of Computer Science and Engineering,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074673170","display_name":"Houjun Huang","orcid":"https://orcid.org/0000-0003-0757-0949"},"institutions":[{"id":"https://openalex.org/I136848882","display_name":"Optech (Canada)","ror":"https://ror.org/03njz2k95","country_code":"CA","type":"company","lineage":["https://openalex.org/I136848882"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Houjun Huang","raw_affiliation_strings":["AISpeech Co., Ltd.,China","AISpeech Co., Ltd., China"],"affiliations":[{"raw_affiliation_string":"AISpeech Co., Ltd.,China","institution_ids":["https://openalex.org/I136848882"]},{"raw_affiliation_string":"AISpeech Co., Ltd., China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100341993","display_name":"Yanmin Qian","orcid":"https://orcid.org/0000-0002-0314-3790"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanmin Qian","raw_affiliation_strings":["MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University,Department of Computer Science and Engineering,Shanghai,China","Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University,Department of Computer Science and Engineering,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043098653","display_name":"Kai Yu","orcid":"https://orcid.org/0000-0002-7102-9826"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Yu","raw_affiliation_strings":["MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University,Department of Computer Science and Engineering,Shanghai,China","Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University,Department of Computer Science and Engineering,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, SpeechLab, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5055220405"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":10.1163,"has_fulltext":false,"cited_by_count":138,"citation_normalized_percentile":{"value":0.98479032,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1652","last_page":"1656"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.9612476825714111},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8351478576660156},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6951398253440857},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6923015713691711},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6197774410247803},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6162930727005005},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5995107889175415},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.59611976146698},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5873706936836243},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5802584290504456},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.5462583303451538},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5081226825714111},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.48591914772987366},{"id":"https://openalex.org/keywords/cross-entropy","display_name":"Cross entropy","score":0.45577386021614075},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.22233453392982483}],"concepts":[{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.9612476825714111},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8351478576660156},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6951398253440857},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6923015713691711},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6197774410247803},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6162930727005005},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5995107889175415},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.59611976146698},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5873706936836243},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5802584290504456},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.5462583303451538},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5081226825714111},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.48591914772987366},{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.45577386021614075},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.22233453392982483}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc47483.2019.9023039","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023039","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7599999904632568}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1524333225","https://openalex.org/W1589137271","https://openalex.org/W2114925438","https://openalex.org/W2150769028","https://openalex.org/W2516764878","https://openalex.org/W2520774990","https://openalex.org/W2584329820","https://openalex.org/W2587150483","https://openalex.org/W2609575245","https://openalex.org/W2622263826","https://openalex.org/W2726515241","https://openalex.org/W2747238065","https://openalex.org/W2748488820","https://openalex.org/W2784163702","https://openalex.org/W2794506738","https://openalex.org/W2808631503","https://openalex.org/W2888968865","https://openalex.org/W2890654588","https://openalex.org/W2890964092","https://openalex.org/W2899771611","https://openalex.org/W2916104401","https://openalex.org/W2962898354","https://openalex.org/W2963371159","https://openalex.org/W2963466847","https://openalex.org/W2969985801","https://openalex.org/W3101227480","https://openalex.org/W3103152812","https://openalex.org/W4289750118","https://openalex.org/W4301239768","https://openalex.org/W6726946684","https://openalex.org/W6748010250","https://openalex.org/W6748257384","https://openalex.org/W6756040250"],"related_works":["https://openalex.org/W4367154275","https://openalex.org/W2911303748","https://openalex.org/W2913125146","https://openalex.org/W4310030444","https://openalex.org/W4225630782","https://openalex.org/W4380994319","https://openalex.org/W4390823696","https://openalex.org/W2804941026","https://openalex.org/W4287163743","https://openalex.org/W3207800728"],"abstract_inverted_index":{"Recently,":[0],"speaker":[1,6,27,61,93,110],"embeddings":[2,56],"extracted":[3],"from":[4],"a":[5,53,84],"discriminative":[7,109],"deep":[8,92],"neural":[9],"network":[10],"(DNN)":[11],"yield":[12],"better":[13],"performance":[14],"than":[15],"the":[16,25,55,101,104,133],"conventional":[17],"methods":[18],"such":[19],"as":[20],"i-vector.":[21],"In":[22,64],"most":[23],"cases,":[24],"DNN":[26],"classifier":[28],"is":[29,103],"trained":[30],"using":[31,153],"cross":[32,154],"entropy":[33,155],"loss":[34,41,156],"with":[35,136,157],"softmax.":[36],"However,":[37],"this":[38,65,69],"kind":[39],"of":[40],"function":[42],"does":[43],"not":[44,58,77],"explicitly":[45],"encourage":[46],"inter-class":[47],"separability":[48],"and":[49,122,166],"intra-class":[50],"compactness.":[51],"As":[52],"result,":[54],"are":[57,89,113],"optimal":[59],"for":[60],"recognition":[62],"tasks.":[63],"paper,":[66],"to":[67,91,106,150],"address":[68],"issue,":[70],"three":[71],"different":[72],"margin":[73,86,102],"based":[74],"losses":[75],"which":[76],"only":[78],"separate":[79],"classes":[80,88],"but":[81],"also":[82],"demand":[83],"fixed":[85],"between":[87],"introduced":[90],"embedding":[94],"learning.":[95],"It":[96],"could":[97],"be":[98],"demonstrated":[99],"that":[100],"key":[105],"obtain":[107],"more":[108],"embeddings.":[111],"Experiments":[112],"conducted":[114],"on":[115,145,162,169],"two":[116],"public":[117],"text":[118],"independent":[119],"tasks:":[120],"VoxCeleb1":[121,163],"Speaker":[123],"in":[124],"The":[125,128],"Wild":[126],"(SITW).":[127],"proposed":[129],"approach":[130],"can":[131],"achieve":[132],"state-of-the-art":[134],"performance,":[135],"25%":[137],"~":[138],"30%":[139],"equal":[140],"error":[141],"rate":[142],"(EER)":[143],"reduction":[144],"both":[146],"tasks":[147],"when":[148],"compared":[149],"strong":[151],"baselines":[152],"softmax,":[158],"obtaining":[159],"2.238%":[160],"EER":[161,168],"test":[164,172],"set":[165],"2.761%":[167],"SITW":[170],"core-core":[171],"set,":[173],"respectively.":[174]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":19},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":29},{"year":2022,"cited_by_count":24},{"year":2021,"cited_by_count":28},{"year":2020,"cited_by_count":17},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
