{"id":"https://openalex.org/W2137821595","doi":"https://doi.org/10.1109/icassp.2009.4960562","title":"Comparing maximum a posteriori vector quantization and Gaussian mixture models in speaker verification","display_name":"Comparing maximum a posteriori vector quantization and Gaussian mixture models in speaker verification","publication_year":2009,"publication_date":"2009-04-01","ids":{"openalex":"https://openalex.org/W2137821595","doi":"https://doi.org/10.1109/icassp.2009.4960562","mag":"2137821595"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2009.4960562","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2009.4960562","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2009 IEEE International Conference on Acoustics, Speech and Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043168931","display_name":"Tomi Kinnunen","orcid":"https://orcid.org/0000-0002-4371-7322"},"institutions":[{"id":"https://openalex.org/I4210106278","display_name":"Joensuu Science Park","ror":"https://ror.org/01j45tp19","country_code":"FI","type":"company","lineage":["https://openalex.org/I4210106278"]},{"id":"https://openalex.org/I175532246","display_name":"University of Eastern Finland","ror":"https://ror.org/00cyydd11","country_code":"FI","type":"education","lineage":["https://openalex.org/I175532246"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Tomi Kinnunen","raw_affiliation_strings":["Speech and Image Processing Unit (SIPU), Department of Computer Science and Statistics, University of Joensuu, Joensuu, Finland","Speech and Image Processing Unit (SIPU), Dept. of Computer Science and Statistics University of Joensuu, P.O. Box 111, FI-80101, FINLAND"],"affiliations":[{"raw_affiliation_string":"Speech and Image Processing Unit (SIPU), Department of Computer Science and Statistics, University of Joensuu, Joensuu, Finland","institution_ids":["https://openalex.org/I4210106278"]},{"raw_affiliation_string":"Speech and Image Processing Unit (SIPU), Dept. of Computer Science and Statistics University of Joensuu, P.O. Box 111, FI-80101, FINLAND","institution_ids":["https://openalex.org/I175532246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008038540","display_name":"Juhani Saastamoinen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210106278","display_name":"Joensuu Science Park","ror":"https://ror.org/01j45tp19","country_code":"FI","type":"company","lineage":["https://openalex.org/I4210106278"]},{"id":"https://openalex.org/I175532246","display_name":"University of Eastern Finland","ror":"https://ror.org/00cyydd11","country_code":"FI","type":"education","lineage":["https://openalex.org/I175532246"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Juhani Saastamoinen","raw_affiliation_strings":["Speech and Image Processing Unit (SIPU), Department of Computer Science and Statistics, University of Joensuu, Joensuu, Finland","Speech and Image Processing Unit (SIPU), Dept. of Computer Science and Statistics University of Joensuu, P.O. Box 111, FI-80101, FINLAND"],"affiliations":[{"raw_affiliation_string":"Speech and Image Processing Unit (SIPU), Department of Computer Science and Statistics, University of Joensuu, Joensuu, Finland","institution_ids":["https://openalex.org/I4210106278"]},{"raw_affiliation_string":"Speech and Image Processing Unit (SIPU), Dept. of Computer Science and Statistics University of Joensuu, P.O. Box 111, FI-80101, FINLAND","institution_ids":["https://openalex.org/I175532246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037259225","display_name":"Ville Hautam\u00e4ki","orcid":"https://orcid.org/0000-0002-5885-0003"},"institutions":[{"id":"https://openalex.org/I175532246","display_name":"University of Eastern Finland","ror":"https://ror.org/00cyydd11","country_code":"FI","type":"education","lineage":["https://openalex.org/I175532246"]},{"id":"https://openalex.org/I4210106278","display_name":"Joensuu Science Park","ror":"https://ror.org/01j45tp19","country_code":"FI","type":"company","lineage":["https://openalex.org/I4210106278"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Ville Hautamaki","raw_affiliation_strings":["Speech and Image Processing Unit (SIPU), Department of Computer Science and Statistics, University of Joensuu, Joensuu, Finland","Speech and Image Processing Unit (SIPU), Dept. of Computer Science and Statistics University of Joensuu, P.O. Box 111, FI-80101, FINLAND"],"affiliations":[{"raw_affiliation_string":"Speech and Image Processing Unit (SIPU), Department of Computer Science and Statistics, University of Joensuu, Joensuu, Finland","institution_ids":["https://openalex.org/I4210106278"]},{"raw_affiliation_string":"Speech and Image Processing Unit (SIPU), Dept. of Computer Science and Statistics University of Joensuu, P.O. Box 111, FI-80101, FINLAND","institution_ids":["https://openalex.org/I175532246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034498876","display_name":"Mikko Vinni","orcid":null},"institutions":[{"id":"https://openalex.org/I4210106278","display_name":"Joensuu Science Park","ror":"https://ror.org/01j45tp19","country_code":"FI","type":"company","lineage":["https://openalex.org/I4210106278"]},{"id":"https://openalex.org/I175532246","display_name":"University of Eastern Finland","ror":"https://ror.org/00cyydd11","country_code":"FI","type":"education","lineage":["https://openalex.org/I175532246"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Mikko Vinni","raw_affiliation_strings":["Speech and Image Processing Unit (SIPU), Department of Computer Science and Statistics, University of Joensuu, Joensuu, Finland","Speech and Image Processing Unit (SIPU), Dept. of Computer Science and Statistics University of Joensuu, P.O. Box 111, FI-80101, FINLAND"],"affiliations":[{"raw_affiliation_string":"Speech and Image Processing Unit (SIPU), Department of Computer Science and Statistics, University of Joensuu, Joensuu, Finland","institution_ids":["https://openalex.org/I4210106278"]},{"raw_affiliation_string":"Speech and Image Processing Unit (SIPU), Dept. of Computer Science and Statistics University of Joensuu, P.O. Box 111, FI-80101, FINLAND","institution_ids":["https://openalex.org/I175532246"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037846871","display_name":"Pasi Fr\u00e4nti","orcid":"https://orcid.org/0000-0002-9554-2827"},"institutions":[{"id":"https://openalex.org/I4210106278","display_name":"Joensuu Science Park","ror":"https://ror.org/01j45tp19","country_code":"FI","type":"company","lineage":["https://openalex.org/I4210106278"]},{"id":"https://openalex.org/I175532246","display_name":"University of Eastern Finland","ror":"https://ror.org/00cyydd11","country_code":"FI","type":"education","lineage":["https://openalex.org/I175532246"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Pasi Franti","raw_affiliation_strings":["Speech and Image Processing Unit (SIPU), Department of Computer Science and Statistics, University of Joensuu, Joensuu, Finland","Speech and Image Processing Unit (SIPU), Dept. of Computer Science and Statistics University of Joensuu, P.O. Box 111, FI-80101, FINLAND"],"affiliations":[{"raw_affiliation_string":"Speech and Image Processing Unit (SIPU), Department of Computer Science and Statistics, University of Joensuu, Joensuu, Finland","institution_ids":["https://openalex.org/I4210106278"]},{"raw_affiliation_string":"Speech and Image Processing Unit (SIPU), Dept. of Computer Science and Statistics University of Joensuu, P.O. Box 111, FI-80101, FINLAND","institution_ids":["https://openalex.org/I175532246"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5043168931"],"corresponding_institution_ids":["https://openalex.org/I175532246","https://openalex.org/I4210106278"],"apc_list":null,"apc_paid":null,"fwci":1.8065,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.88102599,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"15","issue":null,"first_page":"4229","last_page":"4232"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9757000207901001,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8349984884262085},{"id":"https://openalex.org/keywords/nist","display_name":"NIST","score":0.715953528881073},{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.6820200681686401},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.6579195261001587},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6217537522315979},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5884815454483032},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5865949988365173},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5378095507621765},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.5120861530303955},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5069465637207031},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.49732235074043274},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.47053346037864685},{"id":"https://openalex.org/keywords/maximum-likelihood","display_name":"Maximum likelihood","score":0.4440738260746002},{"id":"https://openalex.org/keywords/maximum-a-posteriori-estimation","display_name":"Maximum a posteriori estimation","score":0.4326710104942322},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.3641946017742157},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2902825176715851},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.17550262808799744}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8349984884262085},{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.715953528881073},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.6820200681686401},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.6579195261001587},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6217537522315979},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5884815454483032},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5865949988365173},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5378095507621765},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.5120861530303955},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5069465637207031},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.49732235074043274},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.47053346037864685},{"id":"https://openalex.org/C49781872","wikidata":"https://www.wikidata.org/wiki/Q1045555","display_name":"Maximum likelihood","level":2,"score":0.4440738260746002},{"id":"https://openalex.org/C9810830","wikidata":"https://www.wikidata.org/wiki/Q635384","display_name":"Maximum a posteriori estimation","level":3,"score":0.4326710104942322},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.3641946017742157},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2902825176715851},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.17550262808799744},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icassp.2009.4960562","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2009.4960562","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2009 IEEE International Conference on Acoustics, Speech and Signal Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.1023.6995","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1023.6995","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://www.researchgate.net/profile/Tomi_Kinnunen/publication/220733877_Comparing_maximum_a_posteriori_vector_quantization_and_Gaussian_mixture_models_in_speaker_verification/links/0fcfd510a54314349c000000.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.1033.8177","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1033.8177","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://mirlab.org/conference_papers/International_Conference/ICASSP%202009/pdfs/0004229.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.144.3921","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.144.3921","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://cs.joensuu.fi/pages/tkinnu/webpage/pdf/ICASSP2009_whichclassifier.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7200000286102295,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W98988881","https://openalex.org/W1945356021","https://openalex.org/W1993482042","https://openalex.org/W2041823554","https://openalex.org/W2078953162","https://openalex.org/W2136879537","https://openalex.org/W2137901633","https://openalex.org/W2147147599","https://openalex.org/W2149283971"],"related_works":["https://openalex.org/W2018623685","https://openalex.org/W2138195696","https://openalex.org/W66821593","https://openalex.org/W2545295736","https://openalex.org/W1521299571","https://openalex.org/W1197719229","https://openalex.org/W2381158726","https://openalex.org/W2316548414","https://openalex.org/W4235705411","https://openalex.org/W1516392727"],"abstract_inverted_index":{"Gaussian":[0],"mixture":[1],"model":[2,6,21,62],"-":[3],"universal":[4],"background":[5],"(GMM-UBM)":[7],"is":[8,80,91,107],"a":[9,19,45,51,77],"standard":[10,46],"reference":[11,52],"classifier":[12,48],"in":[13],"speaker":[14],"verification.":[15],"We":[16,54],"have":[17],"proposed":[18],"simplified":[20],"using":[22],"vector":[23],"quantization":[24],"(VQ-UBM).":[25],"In":[26],"this":[27],"study,":[28],"we":[29],"extensively":[30],"compare":[31],"these":[32],"two":[33],"classifiers":[34],"on":[35,56],"NIST":[36],"2005,":[37],"2006":[38],"and":[39,64,109],"2008":[40],"SRE":[41],"corpora,":[42],"while":[43],"having":[44],"discriminative":[47],"(GLDS-SVM)":[49],"as":[50],"point.":[53],"focus":[55],"parameter":[57],"setting":[58],"for":[59,66,86,93],"N-top":[60],"scoring,":[61],"order,":[63],"performance":[65],"different":[67],"amounts":[68],"of":[69,104],"training":[70,103],"data.":[71],"The":[72,96],"most":[73],"interesting":[74],"result,":[75],"against":[76],"general":[78],"belief,":[79],"that":[81,100],"GMM-UBM":[82],"yields":[83],"better":[84],"results":[85,97],"short":[87],"segments":[88],"whereas":[89],"VQ-UBM":[90],"good":[92],"long":[94],"utterances.":[95],"also":[98],"suggest":[99],"maximum":[101],"likelihood":[102],"the":[105,115],"UBM":[106,116],"sub-optimal,":[108],"hence,":[110],"alternative":[111],"ways":[112],"to":[113],"train":[114],"should":[117],"be":[118],"considered.":[119]},"counts_by_year":[{"year":2015,"cited_by_count":3},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
