{"id":"https://openalex.org/W2281877999","doi":"https://doi.org/10.21437/odyssey.2016-25","title":"The IBM 2016 Speaker Recognition System","display_name":"The IBM 2016 Speaker Recognition System","publication_year":2016,"publication_date":"2016-06-21","ids":{"openalex":"https://openalex.org/W2281877999","doi":"https://doi.org/10.21437/odyssey.2016-25","mag":"2281877999"},"language":"en","primary_location":{"id":"doi:10.21437/odyssey.2016-25","is_oa":false,"landing_page_url":"https://doi.org/10.21437/odyssey.2016-25","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Speaker and Language Recognition Workshop (Odyssey 2016)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1602.07291","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051167459","display_name":"Seyed Omid Sadjadi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Seyed Omid Sadjadi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002536077","display_name":"Sriram Ganapathy","orcid":"https://orcid.org/0000-0002-5779-9066"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sriram Ganapathy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5029030495","display_name":"Jason Pelecanos","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jason Pelecanos","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5051167459"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":7.0945,"has_fulltext":true,"cited_by_count":22,"citation_normalized_percentile":{"value":0.96940426,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"174","last_page":"180"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7756485939025879},{"id":"https://openalex.org/keywords/nist","display_name":"NIST","score":0.75864577293396},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7585537433624268},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.7057691812515259},{"id":"https://openalex.org/keywords/linear-discriminant-analysis","display_name":"Linear discriminant analysis","score":0.6445629596710205},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5308236479759216},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5277305841445923},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.5108856558799744},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.4933535158634186},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4770597517490387},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.4674537777900696},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42162972688674927},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.41090816259384155},{"id":"https://openalex.org/keywords/sound-pressure","display_name":"Sound pressure","score":0.11296853423118591},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10193374752998352},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07934263348579407}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7756485939025879},{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.75864577293396},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7585537433624268},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.7057691812515259},{"id":"https://openalex.org/C69738355","wikidata":"https://www.wikidata.org/wiki/Q1228929","display_name":"Linear discriminant analysis","level":2,"score":0.6445629596710205},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5308236479759216},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5277305841445923},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.5108856558799744},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.4933535158634186},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4770597517490387},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.4674537777900696},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42162972688674927},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.41090816259384155},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.11296853423118591},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10193374752998352},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07934263348579407},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/odyssey.2016-25","is_oa":false,"landing_page_url":"https://doi.org/10.21437/odyssey.2016-25","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Speaker and Language Recognition Workshop (Odyssey 2016)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1602.07291","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1602.07291","pdf_url":"https://arxiv.org/pdf/1602.07291","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2281877999","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1602.07291.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1602.07291","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1602.07291","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1602.07291","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1602.07291","pdf_url":"https://arxiv.org/pdf/1602.07291","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.6299999952316284}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2281877999.pdf","grobid_xml":"https://content.openalex.org/works/W2281877999.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W97072897","https://openalex.org/W1502987634","https://openalex.org/W1770825568","https://openalex.org/W2002342963","https://openalex.org/W2023582935","https://openalex.org/W2039057510","https://openalex.org/W2085535170","https://openalex.org/W2107638917","https://openalex.org/W2121981798","https://openalex.org/W2155117693","https://openalex.org/W2158069733","https://openalex.org/W2186517251","https://openalex.org/W2290689761","https://openalex.org/W2395043307","https://openalex.org/W2395750323","https://openalex.org/W2397703486","https://openalex.org/W2916986993"],"related_works":["https://openalex.org/W2963068250","https://openalex.org/W2039057510","https://openalex.org/W2395750323","https://openalex.org/W2150769028","https://openalex.org/W2046056978","https://openalex.org/W1524333225","https://openalex.org/W2890964092","https://openalex.org/W2406312423","https://openalex.org/W2131734706","https://openalex.org/W1992796048","https://openalex.org/W42826071","https://openalex.org/W1548900215","https://openalex.org/W2584329820","https://openalex.org/W2520774990","https://openalex.org/W2194775991","https://openalex.org/W2982445724","https://openalex.org/W3200802588","https://openalex.org/W2969221484","https://openalex.org/W3037590244","https://openalex.org/W2408110232"],"abstract_inverted_index":{"In":[0,19],"this":[1,240],"paper":[2,241],"we":[3,21],"describe":[4],"the":[5,10,56,60,72,94,115,121,130,150,165,181,194,200,206,243,250],"recent":[6],"advancements":[7],"made":[8],"in":[9,30,120,160,187,199,217,239],"IBM":[11],"i-vector":[12,122,232],"speaker":[13,90,133,170,188],"recognition":[14,86,134,189],"system":[15],"for":[16,89,169],"conversational":[17],"speech.":[18],"particular,":[20],"identify":[22],"key":[23],"techniques":[24,40,128],"that":[25,49,66],"contribute":[26],"to":[27,52,113,156,175,212,223,247],"significant":[28],"improvements":[29,216],"performance":[31,218],"of":[32,55,74,96,108,162,196],"our":[33,229,235],"system,":[34,88],"and":[35,76,92,142,191],"quantify":[36],"their":[37],"contributions.":[38],"The":[39],"include:":[41],"1)":[42,149],"a":[43,97,105],"nearest-neighbor":[44],"discriminant":[45,63],"analysis":[46,64],"(NDA)":[47],"approach":[48],"is":[50,152],"formulated":[51],"alleviate":[53],"some":[54],"limitations":[57],"associated":[58],"with":[59,104],"conventional":[61],"linear":[62],"(LDA)":[65],"assumes":[67],"Gaussian":[68],"class-conditional":[69],"distributions,":[70],"2)":[71,172],"application":[73],"speaker-":[75],"channel-adapted":[77],"features,":[78],"which":[79],"are":[80],"derived":[81],"from":[82,210,221],"an":[83],"automatic":[84],"speech":[85],"(ASR)":[87],"recognition,":[91,171],"3)":[93,192],"use":[95],"deep":[98],"neural":[99],"network":[100],"(DNN)":[101],"acoustic":[102,177,202],"model":[103,203],"large":[106],"number":[107,195],"output":[109,197],"units":[110,198],"(~10k":[111],"senones)":[112],"compute":[114],"frame-level":[116],"soft":[117],"alignments":[118],"required":[119],"estimation":[123],"process.":[124],"We":[125],"evaluate":[126],"these":[127],"on":[129,249],"NIST":[131,251],"2010":[132,253],"evaluation":[135],"(SRE)":[136],"extended":[137,254],"core":[138,255],"conditions":[139],"involving":[140],"telephone":[141],"microphone":[143],"trials.":[144],"Experimental":[145],"results":[146,237],"indicate":[147],"that:":[148],"NDA":[151],"more":[153],"effective":[154],"(up":[155],"35%":[157],"relative":[158,225],"improvement":[159],"terms":[161],"EER)":[163],"than":[164],"traditional":[166],"parametric":[167],"LDA":[168],"when":[173],"compared":[174],"raw":[176],"features":[178,184],"(e.g.,":[179],"MFCCs),":[180],"ASR":[182],"speaker-adapted":[183],"provide":[185],"gains":[186,227],"performance,":[190],"increasing":[193,205],"DNN":[201],"(i.e.,":[204],"senone":[207],"set":[208],"size":[209],"2k":[211],"10k)":[213],"provides":[214],"consistent":[215],"(for":[219],"example":[220],"37%":[222],"57%":[224],"EER":[226],"over":[228],"baseline":[230],"GMM":[231],"system).":[233],"To":[234],"knowledge,":[236],"reported":[238],"represent":[242],"best":[244],"performances":[245],"published":[246],"date":[248],"SRE":[252],"tasks.":[256]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":2}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
