{"id":"https://openalex.org/W2802973008","doi":"https://doi.org/10.1109/taslp.2018.2831456","title":"Text-Independent Speaker Verification Based on Triplet Convolutional Neural Network Embeddings","display_name":"Text-Independent Speaker Verification Based on Triplet Convolutional Neural Network Embeddings","publication_year":2018,"publication_date":"2018-04-30","ids":{"openalex":"https://openalex.org/W2802973008","doi":"https://doi.org/10.1109/taslp.2018.2831456","mag":"2802973008"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2018.2831456","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2018.2831456","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005304261","display_name":"Chunlei Zhang","orcid":"https://orcid.org/0000-0002-3851-2357"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chunlei Zhang","raw_affiliation_strings":["Center for Robust Speech Systems, The University of Texas at Dallas, Richardson, TX, USA"],"affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084879161","display_name":"Kazuhito Koishida","orcid":"https://orcid.org/0000-0002-3111-5375"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kazuhito Koishida","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057910370","display_name":"John H. L. Hansen","orcid":"https://orcid.org/0000-0003-1382-9929"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John H. L. Hansen","raw_affiliation_strings":["Center for Robust Speech Systems, The University of Texas at Dallas, Richardson, TX, USA"],"affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5005304261"],"corresponding_institution_ids":["https://openalex.org/I162577319"],"apc_list":null,"apc_paid":null,"fwci":18.4111,"has_fulltext":false,"cited_by_count":188,"citation_normalized_percentile":{"value":0.99317563,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"26","issue":"9","first_page":"1633","last_page":"1644"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7827085256576538},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6294381022453308},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5862223505973816},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.567436695098877},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.5656851530075073},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5582420825958252},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5525113940238953},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5509254336357117},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5463226437568665},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.4754045903682709},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.44712644815444946},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.44152674078941345},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.43102607131004333},{"id":"https://openalex.org/keywords/network-architecture","display_name":"Network architecture","score":0.42799803614616394},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4240444004535675},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4228900372982025}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7827085256576538},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6294381022453308},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5862223505973816},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.567436695098877},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.5656851530075073},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5582420825958252},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5525113940238953},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5509254336357117},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5463226437568665},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.4754045903682709},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.44712644815444946},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.44152674078941345},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.43102607131004333},{"id":"https://openalex.org/C193415008","wikidata":"https://www.wikidata.org/wiki/Q639681","display_name":"Network architecture","level":2,"score":0.42799803614616394},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4240444004535675},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4228900372982025},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2018.2831456","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2018.2831456","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6499999761581421,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G8087301405","display_name":null,"funder_award_id":"FA8750-15-1-0205","funder_id":"https://openalex.org/F4320338294","funder_display_name":"Air Force Research Laboratory"}],"funders":[{"id":"https://openalex.org/F4320338294","display_name":"Air Force Research Laboratory","ror":"https://ror.org/02e2egq70"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W72757270","https://openalex.org/W1524333225","https://openalex.org/W1936725236","https://openalex.org/W2034626437","https://openalex.org/W2039057510","https://openalex.org/W2041823554","https://openalex.org/W2046056978","https://openalex.org/W2064364374","https://openalex.org/W2078169166","https://openalex.org/W2096733369","https://openalex.org/W2097117768","https://openalex.org/W2107638917","https://openalex.org/W2114925438","https://openalex.org/W2129066450","https://openalex.org/W2129244720","https://openalex.org/W2143612262","https://openalex.org/W2143694533","https://openalex.org/W2147147599","https://openalex.org/W2150769028","https://openalex.org/W2175575774","https://openalex.org/W2179352600","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2274287116","https://openalex.org/W2281877999","https://openalex.org/W2346850980","https://openalex.org/W2395750323","https://openalex.org/W2396385162","https://openalex.org/W2404617565","https://openalex.org/W2405505124","https://openalex.org/W2405524873","https://openalex.org/W2406312423","https://openalex.org/W2406366455","https://openalex.org/W2407374891","https://openalex.org/W2462543284","https://openalex.org/W2516985725","https://openalex.org/W2520774990","https://openalex.org/W2533523411","https://openalex.org/W2576165910","https://openalex.org/W2584329820","https://openalex.org/W2587150483","https://openalex.org/W2638067502","https://openalex.org/W2726515241","https://openalex.org/W2747238065","https://openalex.org/W2748488820","https://openalex.org/W2787051072","https://openalex.org/W2953309350","https://openalex.org/W2963068250","https://openalex.org/W2963702081","https://openalex.org/W2963921497","https://openalex.org/W2963947576","https://openalex.org/W2964228006","https://openalex.org/W2964350391","https://openalex.org/W3098722327","https://openalex.org/W3099206234","https://openalex.org/W4234330420","https://openalex.org/W6602923018","https://openalex.org/W6631362777","https://openalex.org/W6694260854","https://openalex.org/W6711901354","https://openalex.org/W6713383670","https://openalex.org/W6713727690","https://openalex.org/W6714259624","https://openalex.org/W6728841359"],"related_works":["https://openalex.org/W4366375373","https://openalex.org/W2743258233","https://openalex.org/W2970216048","https://openalex.org/W3202244193","https://openalex.org/W2043075591","https://openalex.org/W2758063741","https://openalex.org/W3186169793","https://openalex.org/W2517027266","https://openalex.org/W3091976719","https://openalex.org/W3160672713"],"abstract_inverted_index":{"The":[0],"effectiveness":[1,251],"of":[2,72,252],"introducing":[3],"deep":[4,35],"neural":[5,37,74,186],"networks":[6],"into":[7],"conventional":[8],"speaker":[9,23,50,77,144,175,190,255],"recognition":[10],"pipelines":[11],"has":[12],"been":[13],"broadly":[14],"shown":[15],"to":[16,104,166,211,217],"benefit":[17],"system":[18,103,192],"performance.":[19,221],"A":[20,69],"novel":[21],"text-independent":[22],"verification":[24],"(SV)":[25],"framework":[26],"based":[27,76,170,188],"on":[28,228],"the":[29,66,73,101,111,123,126,141,155,168,173,181,204,224,250],"triplet":[30,81],"loss":[31,82],"and":[32,59,96,132,193,199,242],"a":[33,48,62,117,185,194,218,233,237],"very":[34],"convolutional":[36],"network":[38,75,94,187],"architecture":[39],"(i.e.,":[40,232],"Inception-Resnet-v1)":[41],"are":[42,209],"investigated":[43],"in":[44,92,122,203],"this":[45],"study,":[46],"where":[47],"fixed-length":[49,127,142],"discriminative":[51,78],"embedding":[52,145,176,191,256],"is":[53,83,90,130,137,163],"learned":[54],"from":[55],"sparse":[56],"speech":[57,235,239],"features":[58],"utilized":[60],"as":[61],"feature":[63],"representation":[64],"for":[65,172],"SV":[67,97,102,182,226],"tasks.":[68],"concise":[70],"description":[71],"training":[79,95],"with":[80,116,140,149,184,223],"presented.":[84],"An":[85],"Euclidean":[86],"distance":[87,169],"similarity":[88],"metric":[89],"applied":[91],"both":[93],"testing,":[98],"which":[99],"ensures":[100],"follow":[105],"an":[106,133],"end-to-end":[107],"fashion.":[108],"By":[109],"replacing":[110],"final":[112],"max/average":[113],"pooling":[114,120],"layer":[115,121],"spatial":[118],"pyramid":[119],"Inception-Resnet-v1":[124],"architecture,":[125],"input":[128,143],"constraint":[129],"relaxed":[131],"obvious":[134],"performance":[135],"gain":[136],"achieved":[138],"compared":[139],"system.":[146,177,257],"For":[147],"datasets":[148,231],"more":[150],"severe":[151],"training/test":[152],"condition":[153],"mismatches,":[154],"probabilistic":[156],"linear":[157],"discriminant":[158],"analysis":[159],"(PLDA)":[160],"back":[161,205],"end":[162],"further":[164],"introduced":[165],"replace":[167],"scoring":[171],"proposed":[174,254],"Thus,":[178],"we":[179],"reconstruct":[180],"task":[183],"front-end":[189],"PLDA":[195],"that":[196,215],"provides":[197],"channel":[198],"noise":[200],"variabilities":[201],"compensation":[202],"end.":[206],"Extensive":[207],"experiments":[208],"conducted":[210],"provide":[212],"useful":[213],"hints":[214],"lead":[216],"better":[219],"testing":[220],"Comparison":[222],"state-of-the-art":[225],"frameworks":[227],"three":[229],"public":[230],"prompt":[234],"corpus,":[236,241],"conversational":[238],"Switchboard":[240],"NIST":[243],"SRE10":[244],"10":[245],"s-10":[246],"s":[247],"condition)":[248],"justifies":[249],"our":[253]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":20},{"year":2023,"cited_by_count":21},{"year":2022,"cited_by_count":26},{"year":2021,"cited_by_count":39},{"year":2020,"cited_by_count":41},{"year":2019,"cited_by_count":23},{"year":2018,"cited_by_count":6}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
