{"id":"https://openalex.org/W2972892026","doi":"https://doi.org/10.21437/interspeech.2019-2208","title":"An End-to-End Text-Independent Speaker Verification Framework with a Keyword Adversarial Network","display_name":"An End-to-End Text-Independent Speaker Verification Framework with a Keyword Adversarial Network","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2972892026","doi":"https://doi.org/10.21437/interspeech.2019-2208","mag":"2972892026"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-2208","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2208","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091430620","display_name":"Sungrack Yun","orcid":"https://orcid.org/0000-0003-2462-3854"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sungrack Yun","raw_affiliation_strings":["Qualcomm AI Research \u2020 , Qualcomm Korea YH"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Qualcomm AI Research \u2020 , Qualcomm Korea YH","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077458889","display_name":"Janghoon Cho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Janghoon Cho","raw_affiliation_strings":["Qualcomm AI Research \u2020 , Qualcomm Korea YH"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Qualcomm AI Research \u2020 , Qualcomm Korea YH","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079140215","display_name":"Jungyun Eum","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jungyun Eum","raw_affiliation_strings":["Qualcomm AI Research \u2020 , Qualcomm Korea YH"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Qualcomm AI Research \u2020 , Qualcomm Korea YH","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038745666","display_name":"Wonil Chang","orcid":"https://orcid.org/0000-0002-4457-4917"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wonil Chang","raw_affiliation_strings":["Qualcomm AI Research \u2020 , Qualcomm Korea YH"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Qualcomm AI Research \u2020 , Qualcomm Korea YH","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089149225","display_name":"Kyuwoong Hwang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kyuwoong Hwang","raw_affiliation_strings":["Qualcomm AI Research \u2020 , Qualcomm Korea YH"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Qualcomm AI Research \u2020 , Qualcomm Korea YH","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.723,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.78943298,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2923","last_page":"2927"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7757816910743713},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7586292028427124},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.6870143413543701},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6687266826629639},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6650480031967163},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.6102524995803833},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5265156030654907},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.4897291958332062},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.47063398361206055},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.44519150257110596},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4393150806427002},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.4238028824329376},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33622634410858154},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3255879878997803}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7757816910743713},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7586292028427124},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.6870143413543701},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6687266826629639},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6650480031967163},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.6102524995803833},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5265156030654907},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.4897291958332062},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.47063398361206055},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.44519150257110596},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4393150806427002},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.4238028824329376},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33622634410858154},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3255879878997803},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2019-2208","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2208","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.699999988079071}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1731081199","https://openalex.org/W1836465849","https://openalex.org/W1965819578","https://openalex.org/W2038101708","https://openalex.org/W2081074144","https://openalex.org/W2114925438","https://openalex.org/W2150769028","https://openalex.org/W2194775991","https://openalex.org/W2302255633","https://openalex.org/W2515837905","https://openalex.org/W2597757402","https://openalex.org/W2622203030","https://openalex.org/W2713640100","https://openalex.org/W2747249235","https://openalex.org/W2748488820","https://openalex.org/W2750499125","https://openalex.org/W2787051072","https://openalex.org/W2799674897","https://openalex.org/W2890964092","https://openalex.org/W2922299546","https://openalex.org/W2962824004","https://openalex.org/W2963043030","https://openalex.org/W2963341071","https://openalex.org/W2963684088","https://openalex.org/W3208728924","https://openalex.org/W4298998200","https://openalex.org/W4320013936"],"related_works":["https://openalex.org/W112947718","https://openalex.org/W66821593","https://openalex.org/W1521299571","https://openalex.org/W1998549096","https://openalex.org/W4312095835","https://openalex.org/W204267554","https://openalex.org/W2134501921","https://openalex.org/W4252590334","https://openalex.org/W2543777506","https://openalex.org/W2407001684"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"an":[3,28],"end-to-end":[4],"text-independent":[5,77],"speaker":[6,13,34,55,78,93,115,126,143],"verification":[7,56,127,144],"framework":[8,128,145],"by":[9],"jointly":[10],"considering":[11],"the":[12,33,37,41,47,51,61,69,81,84,87,91,104,108,111,114,121,130,137,156],"embedding":[14,29,79,88,116],"(SE)":[15],"network":[16,24,43,71],"and":[17,65,76,132,136,151],"automatic":[18],"speech":[19],"recognition":[20],"(ASR)":[21],"network.The":[22],"SE":[23],"learns":[25,44],"to":[26,45,72,155],"output":[27],"vector":[30,117],"which":[31],"distinguishes":[32],"characteristics":[35],"of":[36,50,68,90,98,107,113],"input":[38],"utterance,":[39],"while":[40,96],"ASR":[42,70,109],"recognize":[46],"phonetic":[48],"context":[49],"input.In":[52],"training":[53],"our":[54,125,142],"framework,":[57],"we":[58,123],"consider":[59],"both":[60],"triplet":[62,82],"loss":[63],"minimization":[64],"adversarial":[66,105],"gradient":[67,106],"obtain":[73],"more":[74],"discriminative":[75],"vectors.With":[80],"loss,":[83],"distances":[85],"between":[86],"vectors":[89],"same":[92],"are":[94,101],"minimized":[95],"those":[97],"different":[99],"speakers":[100],"maximized.Also,":[102],"with":[103],"network,":[110],"text-dependency":[112],"can":[118],"be":[119],"reduced.In":[120],"experiments,":[122],"evaluated":[124],"using":[129],"LibriSpeech":[131],"CHiME":[133],"2013":[134],"dataset,":[135],"evaluation":[138],"results":[139],"show":[140],"that":[141],"shows":[146],"lower":[147],"equal":[148],"error":[149],"rate":[150],"better":[152],"textindependency":[153],"compared":[154],"other":[157],"approaches.":[158]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
