{"id":"https://openalex.org/W2137782235","doi":"https://doi.org/10.1109/icassp.2013.6639187","title":"Modeling spectral envelopes using restricted Boltzmann machines for statistical parametric speech synthesis","display_name":"Modeling spectral envelopes using restricted Boltzmann machines for statistical parametric speech synthesis","publication_year":2013,"publication_date":"2013-05-01","ids":{"openalex":"https://openalex.org/W2137782235","doi":"https://doi.org/10.1109/icassp.2013.6639187","mag":"2137782235"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2013.6639187","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2013.6639187","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE International Conference on Acoustics, Speech and Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhen-Hua Ling","raw_affiliation_strings":["University of Science and Technology of China, Hefei, Anhui, CN","Nat. Eng. Lab. of Speech & Language Inf. Process., Univ. of Sci. & Technol. of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, Anhui, CN","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"Nat. Eng. Lab. of Speech & Language Inf. Process., Univ. of Sci. & Technol. of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100671324","display_name":"Li Deng","orcid":"https://orcid.org/0000-0002-1014-0790"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Li Deng","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA","[Microsoft Research,Redmond,WA,USA]"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"[Microsoft Research,Redmond,WA,USA]","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA","[Microsoft Research,Redmond,WA,USA]"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"[Microsoft Research,Redmond,WA,USA]","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5059767940"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":11.5415,"has_fulltext":false,"cited_by_count":49,"citation_normalized_percentile":{"value":0.98383588,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"7825","last_page":"7829"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7975733280181885},{"id":"https://openalex.org/keywords/spectral-envelope","display_name":"Spectral envelope","score":0.7423256039619446},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.6643974184989929},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.6569098234176636},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6033331155776978},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.558545708656311},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5258477330207825},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.4876788854598999},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.45324671268463135},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.421114444732666},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4198456406593323},{"id":"https://openalex.org/keywords/boltzmann-machine","display_name":"Boltzmann machine","score":0.4150159955024719},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3311983346939087},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.31846633553504944},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.22676706314086914},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.0875348150730133},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08379894495010376}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7975733280181885},{"id":"https://openalex.org/C54926389","wikidata":"https://www.wikidata.org/wiki/Q7575188","display_name":"Spectral envelope","level":2,"score":0.7423256039619446},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.6643974184989929},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.6569098234176636},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6033331155776978},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.558545708656311},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5258477330207825},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.4876788854598999},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.45324671268463135},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.421114444732666},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4198456406593323},{"id":"https://openalex.org/C192576344","wikidata":"https://www.wikidata.org/wiki/Q194706","display_name":"Boltzmann machine","level":3,"score":0.4150159955024719},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3311983346939087},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.31846633553504944},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.22676706314086914},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0875348150730133},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08379894495010376},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2013.6639187","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2013.6639187","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE International Conference on Acoustics, Speech and Signal Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5299999713897705,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1600722501","https://openalex.org/W1813659000","https://openalex.org/W1993882792","https://openalex.org/W2042691334","https://openalex.org/W2049686551","https://openalex.org/W2100495367","https://openalex.org/W2108674328","https://openalex.org/W2116064496","https://openalex.org/W2136922672","https://openalex.org/W2147768505","https://openalex.org/W2154920538","https://openalex.org/W2160815625","https://openalex.org/W2164700406","https://openalex.org/W2168013545","https://openalex.org/W2184045248","https://openalex.org/W2596135543","https://openalex.org/W6638304892","https://openalex.org/W6676044216","https://openalex.org/W6735255722"],"related_works":["https://openalex.org/W4391272374","https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W40885451","https://openalex.org/W2108985546","https://openalex.org/W2081919107","https://openalex.org/W2433276473","https://openalex.org/W2535215250","https://openalex.org/W1537411440","https://openalex.org/W2024201202"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,162],"new":[4,46],"spectral":[5,22,29,58,78,101,107,148],"modeling":[6,92],"method":[7,47,166],"for":[8,36],"statistical":[9],"parametric":[10,42],"speech":[11,43,176],"synthesis.":[12],"In":[13],"contrast":[14],"to":[15,72,110,142],"the":[16,34,50,53,74,77,93,106,111,121,126,138,144,147,157,170,173],"conventional":[17,174],"methods":[18],"in":[19,91,125],"which":[20],"high-level":[21],"parameters,":[23],"such":[24],"as":[25,33,120],"mel-cepstra":[26],"or":[27,56],"line":[28],"pairs,":[30],"are":[31,103],"adopted":[32],"features":[35],"hidden":[37],"Markov":[38],"model":[39,143],"(HMM)":[40],"based":[41],"synthesis,":[44],"our":[45,164],"directly":[48],"models":[49],"distribution":[51,75,95,145],"of":[52,61,76,96,114,146,172],"lower-level,":[54],"un-transformed":[55],"raw":[57],"envelopes.":[59],"Instead":[60],"using":[62,179],"single":[63],"Gaussian":[64,122,158],"distributions,":[65],"we":[66],"adopt":[67],"restricted":[68],"Boltzmann":[69],"machines":[70],"(RBM)":[71],"represent":[73],"envelopes":[79,149],"at":[80,130],"each":[81,115],"HMM":[82],"state.":[83],"We":[84],"anticipate":[85],"these":[86],"will":[87],"give":[88],"superior":[89],"performance":[90],"joint":[94],"high-dimensional":[97],"stochastic":[98],"vectors.":[99],"The":[100],"parameters":[102],"derived":[104],"from":[105],"envelope":[108],"corresponding":[109],"estimated":[112],"mode":[113],"context-dependent":[116],"RBM":[117,139],"and":[118,153],"act":[119],"mean":[123],"vector":[124],"parameter":[127],"generation":[128],"procedure":[129],"synthesis":[131,177],"time.":[132],"Our":[133],"experimental":[134],"results":[135],"show":[136],"that":[137],"is":[140],"able":[141],"with":[150],"better":[151],"accuracy":[152],"generalization":[154],"ability":[155],"than":[156],"mixture":[159],"model.":[160],"As":[161],"result,":[163],"proposed":[165],"can":[167],"significantly":[168],"improve":[169],"naturalness":[171],"HMM-based":[175],"system":[178],"mel-cepstra.":[180]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":10},{"year":2013,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
