{"id":"https://openalex.org/W1977331234","doi":"https://doi.org/10.1109/tasl.2012.2191960","title":"Low-Variance Multitaper MFCC Features: A Case Study in Robust Speaker Verification","display_name":"Low-Variance Multitaper MFCC Features: A Case Study in Robust Speaker Verification","publication_year":2012,"publication_date":"2012-04-11","ids":{"openalex":"https://openalex.org/W1977331234","doi":"https://doi.org/10.1109/tasl.2012.2191960","mag":"1977331234"},"language":"en","primary_location":{"id":"doi:10.1109/tasl.2012.2191960","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tasl.2012.2191960","pdf_url":null,"source":{"id":"https://openalex.org/S199497470","display_name":"IEEE Transactions on Audio Speech and Language Processing","issn_l":"1558-7916","issn":["1558-7916","1558-7924"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043168931","display_name":"Tomi Kinnunen","orcid":"https://orcid.org/0000-0002-4371-7322"},"institutions":[{"id":"https://openalex.org/I175532246","display_name":"University of Eastern Finland","ror":"https://ror.org/00cyydd11","country_code":"FI","type":"education","lineage":["https://openalex.org/I175532246"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Tomi Kinnunen","raw_affiliation_strings":["School of Computing, University of Eastern Finland, Joensuu, Finland"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Eastern Finland, Joensuu, Finland","institution_ids":["https://openalex.org/I175532246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023224767","display_name":"Rahim Saeidi","orcid":"https://orcid.org/0000-0002-9084-0091"},"institutions":[{"id":"https://openalex.org/I175532246","display_name":"University of Eastern Finland","ror":"https://ror.org/00cyydd11","country_code":"FI","type":"education","lineage":["https://openalex.org/I175532246"]},{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["FI","NL"],"is_corresponding":false,"raw_author_name":"Rahim Saeidi","raw_affiliation_strings":["Radboud University Nijmegen, Nijmegen, Netherlands","School of Computing, University of Eastern Finland, Joensuu, Finland"],"affiliations":[{"raw_affiliation_string":"Radboud University Nijmegen, Nijmegen, Netherlands","institution_ids":["https://openalex.org/I145872427"]},{"raw_affiliation_string":"School of Computing, University of Eastern Finland, Joensuu, Finland","institution_ids":["https://openalex.org/I175532246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071435229","display_name":"Filip Sedlak","orcid":null},"institutions":[{"id":"https://openalex.org/I175532246","display_name":"University of Eastern Finland","ror":"https://ror.org/00cyydd11","country_code":"FI","type":"education","lineage":["https://openalex.org/I175532246"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Filip Sedlak","raw_affiliation_strings":["School of Computing, University of Eastern Finland, Joensuu, Finland"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Eastern Finland, Joensuu, Finland","institution_ids":["https://openalex.org/I175532246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004287909","display_name":"Kong Aik Lee","orcid":"https://orcid.org/0000-0001-9133-3000"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Kong Aik Lee","raw_affiliation_strings":["Human Language Technology, Institute for Infocomm Research (I2R), Singapore"],"affiliations":[{"raw_affiliation_string":"Human Language Technology, Institute for Infocomm Research (I2R), Singapore","institution_ids":["https://openalex.org/I3005327000"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059166884","display_name":"Johan Sandberg","orcid":"https://orcid.org/0000-0002-4481-3367"},"institutions":[{"id":"https://openalex.org/I187531555","display_name":"Lund University","ror":"https://ror.org/012a77v79","country_code":"SE","type":"education","lineage":["https://openalex.org/I187531555"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Johan Sandberg","raw_affiliation_strings":["Centre for Mathematical Sciences, Lund University, Lund, Sweden"],"affiliations":[{"raw_affiliation_string":"Centre for Mathematical Sciences, Lund University, Lund, Sweden","institution_ids":["https://openalex.org/I187531555"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013815510","display_name":"Maria Hansson-Sandsten","orcid":null},"institutions":[{"id":"https://openalex.org/I187531555","display_name":"Lund University","ror":"https://ror.org/012a77v79","country_code":"SE","type":"education","lineage":["https://openalex.org/I187531555"]},{"id":"https://openalex.org/I1279596006","display_name":"Statistics Sweden","ror":"https://ror.org/05x7wz523","country_code":"SE","type":"government","lineage":["https://openalex.org/I1279596006"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Maria Hansson-Sandsten","raw_affiliation_strings":["Mathematical Statistics, Centre for Mathematical Sciences, Lund University, Lund, Sweden"],"affiliations":[{"raw_affiliation_string":"Mathematical Statistics, Centre for Mathematical Sciences, Lund University, Lund, Sweden","institution_ids":["https://openalex.org/I1279596006","https://openalex.org/I187531555"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["Human Language Technology, Institute for Infocomm Research (I2R), Singapore"],"affiliations":[{"raw_affiliation_string":"Human Language Technology, Institute for Infocomm Research (I2R), Singapore","institution_ids":["https://openalex.org/I3005327000"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5043168931"],"corresponding_institution_ids":["https://openalex.org/I175532246"],"apc_list":null,"apc_paid":null,"fwci":12.1344,"has_fulltext":false,"cited_by_count":128,"citation_normalized_percentile":{"value":0.99298667,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":"20","issue":"7","first_page":"1990","last_page":"2001"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multitaper","display_name":"Multitaper","score":0.9756867289543152},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.7540891170501709},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7398515939712524},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.666800856590271},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.6173483729362488},{"id":"https://openalex.org/keywords/nist","display_name":"NIST","score":0.592542290687561},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.5276374220848083},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4828908443450928},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4753410816192627},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.462752103805542},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.4209856688976288},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.2987360656261444},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.28511396050453186},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.18443936109542847},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1826275885105133}],"concepts":[{"id":"https://openalex.org/C2777067715","wikidata":"https://www.wikidata.org/wiki/Q3327726","display_name":"Multitaper","level":2,"score":0.9756867289543152},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.7540891170501709},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7398515939712524},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.666800856590271},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.6173483729362488},{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.592542290687561},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.5276374220848083},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4828908443450928},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4753410816192627},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.462752103805542},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.4209856688976288},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.2987360656261444},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.28511396050453186},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.18443936109542847},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1826275885105133},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/tasl.2012.2191960","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tasl.2012.2191960","pdf_url":null,"source":{"id":"https://openalex.org/S199497470","display_name":"IEEE Transactions on Audio Speech and Language Processing","issn_l":"1558-7916","issn":["1558-7916","1558-7924"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.221.7050","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.221.7050","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://cs.joensuu.fi/pages/tkinnu/webpage/pdf/multitaperMFCC_IEEE_TASLP_doublecolumn.pdf","raw_type":"text"},{"id":"pmh:oai:lup.lub.lu.se:e055865e-5dbc-4a3f-8a32-1629664eca7f","is_oa":false,"landing_page_url":"https://lup.lub.lu.se/record/2826390","pdf_url":null,"source":{"id":"https://openalex.org/S4306400536","display_name":"Lund University Publications (Lund University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I187531555","host_organization_name":"Lund University","host_organization_lineage":["https://openalex.org/I187531555"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISSN: 1558-7924","raw_type":"text"},{"id":"pmh:oai:repository.ubn.ru.nl:2066/107636","is_oa":false,"landing_page_url":"http://hdl.handle.net/2066/107636","pdf_url":null,"source":{"id":"https://openalex.org/S4306401067","display_name":"Radboud Repository (Radboud University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I145872427","host_organization_name":"Radboud University Nijmegen","host_organization_lineage":["https://openalex.org/I145872427"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing, 20, 7, pp. 1990-2001","raw_type":"Article / Letter to editor"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321013","display_name":"Radboud Universiteit","ror":"https://ror.org/016xsfp80"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W193404450","https://openalex.org/W204053250","https://openalex.org/W1495679096","https://openalex.org/W1578856370","https://openalex.org/W1586405805","https://openalex.org/W1605802458","https://openalex.org/W1760200923","https://openalex.org/W1966264494","https://openalex.org/W2041823554","https://openalex.org/W2044054970","https://openalex.org/W2052667477","https://openalex.org/W2078953162","https://openalex.org/W2084410249","https://openalex.org/W2092305100","https://openalex.org/W2099654254","https://openalex.org/W2100764393","https://openalex.org/W2103651164","https://openalex.org/W2106822551","https://openalex.org/W2106915859","https://openalex.org/W2107601051","https://openalex.org/W2107638917","https://openalex.org/W2111460811","https://openalex.org/W2112582577","https://openalex.org/W2119468495","https://openalex.org/W2121389521","https://openalex.org/W2126693545","https://openalex.org/W2127180234","https://openalex.org/W2131475987","https://openalex.org/W2134044554","https://openalex.org/W2135027063","https://openalex.org/W2136879537","https://openalex.org/W2137075158","https://openalex.org/W2140024381","https://openalex.org/W2144053378","https://openalex.org/W2144992143","https://openalex.org/W2147147599","https://openalex.org/W2148154194","https://openalex.org/W2148554390","https://openalex.org/W2149338142","https://openalex.org/W2150769028","https://openalex.org/W2152395643","https://openalex.org/W2154278880","https://openalex.org/W2155722796","https://openalex.org/W2166223208","https://openalex.org/W2168175751","https://openalex.org/W2168561756","https://openalex.org/W2172101055","https://openalex.org/W2263224228","https://openalex.org/W2295184593","https://openalex.org/W2397634864","https://openalex.org/W2406587520","https://openalex.org/W4245919820","https://openalex.org/W6607843732","https://openalex.org/W6635268374","https://openalex.org/W6712325649"],"related_works":["https://openalex.org/W2137997377","https://openalex.org/W2018086531","https://openalex.org/W1980297060","https://openalex.org/W2387604097","https://openalex.org/W2787035864","https://openalex.org/W2373675101","https://openalex.org/W2048014685","https://openalex.org/W2370972896","https://openalex.org/W106160982","https://openalex.org/W2359140082"],"abstract_inverted_index":{"In":[0,70],"speech":[1,62],"and":[2,94,112,133,150,175],"audio":[3],"applications,":[4],"short-term":[5],"signal":[6],"spectrum":[7,32],"is":[8,42],"often":[9],"represented":[10],"using":[11,96],"mel-frequency":[12],"cepstral":[13],"coefficients":[14],"(MFCCs)":[15],"computed":[16],"from":[17],"a":[18,82,183],"windowed":[19,40,144],"discrete":[20],"Fourier":[21],"transform":[22],"(DFT).":[23],"Windowing":[24],"reduces":[25,164],"spectral":[26],"leakage":[27],"but":[28],"variance":[29,95],"of":[30,91],"the":[31,43,75,101,109,142,154,169,188],"estimate":[33],"remains":[34],"high.":[35],"An":[36],"elegant":[37],"extension":[38],"to":[39],"DFT":[41,145],"so-called":[44],"multitaper":[45,76,180],"method":[46,77],"which":[47],"uses":[48],"multiple":[49],"time-domain":[50],"windows":[51],"(tapers)":[52],"with":[53,81,124],"frequency-domain":[54],"averaging.":[55],"Multitapers":[56,138],"have":[57],"received":[58],"little":[59],"attention":[60],"in":[61,157],"processing":[63],"even":[64],"though":[65],"they":[66],"produce":[67],"low-variance":[68],"features.":[69],"this":[71],"paper,":[72],"we":[73,116],"propose":[74],"for":[78,186],"MFCC":[79,92],"extraction":[80],"practical":[83],"focus.":[84],"We":[85],"provide,":[86],"first,":[87],"detailed":[88],"statistical":[89],"analysis":[90,136],"bias":[93],"autoregressive":[97],"process":[98],"simulations":[99],"on":[100,108,153,168],"TIMIT":[102],"corpus.":[103],"For":[104],"speaker":[105],"verification":[106],"experiments":[107],"NIST":[110,158],"2002":[111],"2008":[113],"SRE":[114],"corpora,":[115],"consider":[117],"three":[118],"Gaussian":[119],"mixture":[120],"model":[121,127],"based":[122],"classifiers":[123],"universal":[125],"background":[126],"(GMM-UBM),":[128],"support":[129],"vector":[130],"machine":[131],"(GMM-SVM)":[132,149],"joint":[134],"factor":[135],"(GMM-JFA).":[137],"improve":[139],"MinDCF":[140,165],"over":[141],"baseline":[143],"by":[146,166],"relative":[147],"20.4%":[148],"13.7%":[151],"(GMM-JFA)":[152],"interview-interview":[155],"condition":[156],"2008.":[159],"The":[160],"GMM-JFA":[161],"system":[162],"further":[163],"18.7%":[167],"telephone":[170],"data.":[171],"With":[172],"these":[173],"improvements":[174],"generally":[176],"noncritical":[177],"parameter":[178],"selection,":[179],"MFCCs":[181],"are":[182],"viable":[184],"candidate":[185],"replacing":[187],"conventional":[189],"MFCCs.":[190]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":13},{"year":2016,"cited_by_count":11},{"year":2015,"cited_by_count":17},{"year":2014,"cited_by_count":16},{"year":2013,"cited_by_count":13},{"year":2012,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
