{"id":"https://openalex.org/W2143171927","doi":"https://doi.org/10.1109/icassp.2004.1326045","title":"Bayesian modelling of the speech spectrum using mixture of Gaussians","display_name":"Bayesian modelling of the speech spectrum using mixture of Gaussians","publication_year":2004,"publication_date":"2004-09-28","ids":{"openalex":"https://openalex.org/W2143171927","doi":"https://doi.org/10.1109/icassp.2004.1326045","mag":"2143171927"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2004.1326045","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2004.1326045","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2004 IEEE International Conference on Acoustics, Speech, and Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049272790","display_name":"Parham Zolfaghari","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"P. Zolfaghari","raw_affiliation_strings":["Speech Open Laboratory, NTT Communication Science Laboratories, NTT Corporation, Soraku-gun, Kyoto, Japan","NTT Commun. Sci. Lab., NTT Corp., Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Speech Open Laboratory, NTT Communication Science Laboratories, NTT Corporation, Soraku-gun, Kyoto, Japan","institution_ids":["https://openalex.org/I2251713219"]},{"raw_affiliation_string":"NTT Commun. Sci. Lab., NTT Corp., Kyoto, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"S. Watanabe","raw_affiliation_strings":["Speech Open Laboratory, NTT Communication Science Laboratories, NTT Corporation, Soraku-gun, Kyoto, Japan","NTT Commun. Sci. Lab., NTT Corp., Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Speech Open Laboratory, NTT Communication Science Laboratories, NTT Corporation, Soraku-gun, Kyoto, Japan","institution_ids":["https://openalex.org/I2251713219"]},{"raw_affiliation_string":"NTT Commun. Sci. Lab., NTT Corp., Kyoto, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018620798","display_name":"Atsushi Nakamura","orcid":"https://orcid.org/0000-0003-0788-2221"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"A. Nakamura","raw_affiliation_strings":["Speech Open Laboratory, NTT Communication Science Laboratories, NTT Corporation, Soraku-gun, Kyoto, Japan","NTT Commun. Sci. Lab., NTT Corp., Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Speech Open Laboratory, NTT Communication Science Laboratories, NTT Corporation, Soraku-gun, Kyoto, Japan","institution_ids":["https://openalex.org/I2251713219"]},{"raw_affiliation_string":"NTT Commun. Sci. Lab., NTT Corp., Kyoto, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027443847","display_name":"Shigeru Katagiri","orcid":"https://orcid.org/0000-0003-4738-5385"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"S. Katagiri","raw_affiliation_strings":["Speech Open Laboratory, NTT Communication Science Laboratories, NTT Corporation, Soraku-gun, Kyoto, Japan","NTT Commun. Sci. Lab., NTT Corp., Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Speech Open Laboratory, NTT Communication Science Laboratories, NTT Corporation, Soraku-gun, Kyoto, Japan","institution_ids":["https://openalex.org/I2251713219"]},{"raw_affiliation_string":"NTT Commun. Sci. Lab., NTT Corp., Kyoto, Japan","institution_ids":["https://openalex.org/I2251713219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5049272790"],"corresponding_institution_ids":["https://openalex.org/I2251713219"],"apc_list":null,"apc_paid":null,"fwci":2.2434,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.88631216,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"1","issue":null,"first_page":"I","last_page":"553"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.8973149657249451},{"id":"https://openalex.org/keywords/histogram","display_name":"Histogram","score":0.6932955384254456},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5114563703536987},{"id":"https://openalex.org/keywords/envelope","display_name":"Envelope (radar)","score":0.4917462170124054},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.49103283882141113},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.48789143562316895},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.48153573274612427},{"id":"https://openalex.org/keywords/formant","display_name":"Formant","score":0.4475501477718353},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4468497037887573},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41496530175209045},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.4127134680747986},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4019745886325836},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.34954142570495605},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3079812824726105},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11585751175880432},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.1004118025302887}],"concepts":[{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.8973149657249451},{"id":"https://openalex.org/C53533937","wikidata":"https://www.wikidata.org/wiki/Q185020","display_name":"Histogram","level":3,"score":0.6932955384254456},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5114563703536987},{"id":"https://openalex.org/C65155139","wikidata":"https://www.wikidata.org/wiki/Q5380912","display_name":"Envelope (radar)","level":3,"score":0.4917462170124054},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.49103283882141113},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48789143562316895},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.48153573274612427},{"id":"https://openalex.org/C158215666","wikidata":"https://www.wikidata.org/wiki/Q1414685","display_name":"Formant","level":3,"score":0.4475501477718353},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4468497037887573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41496530175209045},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.4127134680747986},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4019745886325836},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34954142570495605},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3079812824726105},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11585751175880432},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.1004118025302887},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C2779581591","wikidata":"https://www.wikidata.org/wiki/Q36244","display_name":"Vowel","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2004.1326045","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2004.1326045","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2004 IEEE International Conference on Acoustics, Speech, and Signal Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4000000059604645,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W196142488","https://openalex.org/W1618600317","https://openalex.org/W1926768285","https://openalex.org/W2105658140","https://openalex.org/W2106498461","https://openalex.org/W6607955727","https://openalex.org/W6636504819"],"related_works":["https://openalex.org/W2046217390","https://openalex.org/W4365503437","https://openalex.org/W2362995206","https://openalex.org/W2004825552","https://openalex.org/W1986672430","https://openalex.org/W1990629684","https://openalex.org/W1971436356","https://openalex.org/W37481168","https://openalex.org/W1992295166","https://openalex.org/W2143508933"],"abstract_inverted_index":{"This":[0,74],"paper":[1,137],"presents":[2],"a":[3,12,28,77],"method":[4],"for":[5,23,68],"modelling":[6,26,116,140],"the":[7,31,55,83,97,145,161,169],"speech":[8],"spectral":[9,57,115,121,170],"envelope":[10],"using":[11,70,105,120,141],"mixture":[13,25,98,146],"of":[14,27,33,54,62,96,132,151,160,163],"Gaussians":[15,143,152,164],"(MoG).":[16],"A":[17,51,109],"novel":[18],"variational":[19,92],"Bayesian":[20,93],"(VB)":[21,94],"framework":[22],"Gaussian":[24],"histogram":[29,52,101],"enables":[30],"derivation":[32],"an":[34,106,157],"objective":[35,158],"function":[36],"that":[37,80],"can":[38],"be":[39],"used":[40,67],"to":[41,166],"simultaneously":[42],"optimise":[43],"both":[44],"model":[45,49,99],"parameter":[46],"distributions":[47],"and":[48,91,113,124,153,156],"structure.":[50],"representation":[53],"STRAIGHT":[56],"envelope,":[58],"which":[59],"is":[60,66,117],"free":[61],"glottal":[63],"excitation":[64],"information,":[65],"parametrisation":[69],"this":[71,136],"MoG":[72],"model.":[73],"results":[75],"in":[76,135,144,148],"parameterisation":[78],"scheme":[79],"purely":[81],"models":[82],"vocal":[84],"tract":[85],"resonant":[86],"characteristics.":[87],"Maximum":[88],"likelihood":[89],"(ML)":[90],"solutions":[95],"on":[100],"data":[102],"are":[103],"found":[104],"iterative":[107],"algorithm.":[108],"comparison":[110],"between":[111],"ML-MoG":[112],"VB-MoG":[114,133],"carried":[118],"out":[119],"distortion":[122],"measures":[123],"mean":[125],"opinion":[126],"scores":[127],"(MOS).":[128],"The":[129],"main":[130],"advantages":[131],"highlighted":[134],"include":[138],"better":[139,149],"fewer":[142],"resulting":[147],"correspondence":[150],"formant-like":[154],"peaks,":[155],"measure":[159],"number":[162],"required":[165],"best":[167],"fit":[168],"envelope.":[171]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2013,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
