{"id":"https://openalex.org/W2255418810","doi":"https://doi.org/10.1109/lsp.2016.2516032","title":"DBN-based Spectral Feature Representation for Statistical Parametric Speech Synthesis","display_name":"DBN-based Spectral Feature Representation for Statistical Parametric Speech Synthesis","publication_year":2016,"publication_date":"2016-01-08","ids":{"openalex":"https://openalex.org/W2255418810","doi":"https://doi.org/10.1109/lsp.2016.2516032","mag":"2255418810"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2016.2516032","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2016.2516032","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059516164","display_name":"Ya-Jun Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ya-Jun Hu","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen-Hua Ling","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5059516164"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":4.2847,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.94655772,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"23","issue":"3","first_page":"321","last_page":"325"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7650567293167114},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.665216863155365},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.6404713988304138},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6181714534759521},{"id":"https://openalex.org/keywords/deep-belief-network","display_name":"Deep belief network","score":0.6081750392913818},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5972135663032532},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.5912137031555176},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5803636312484741},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5197505354881287},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.49915504455566406},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.49392539262771606},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.4434944987297058},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.43504413962364197},{"id":"https://openalex.org/keywords/spectral-envelope","display_name":"Spectral envelope","score":0.4186147451400757},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3951989412307739},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22385355830192566},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09041577577590942},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07903268933296204}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7650567293167114},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.665216863155365},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.6404713988304138},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6181714534759521},{"id":"https://openalex.org/C97385483","wikidata":"https://www.wikidata.org/wiki/Q16954980","display_name":"Deep belief network","level":3,"score":0.6081750392913818},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5972135663032532},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.5912137031555176},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5803636312484741},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5197505354881287},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.49915504455566406},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.49392539262771606},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.4434944987297058},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.43504413962364197},{"id":"https://openalex.org/C54926389","wikidata":"https://www.wikidata.org/wiki/Q7575188","display_name":"Spectral envelope","level":2,"score":0.4186147451400757},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3951989412307739},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22385355830192566},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09041577577590942},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07903268933296204},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2016.2516032","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2016.2516032","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2516691852","display_name":null,"funder_award_id":"61273032","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W44815768","https://openalex.org/W1569145343","https://openalex.org/W1600722501","https://openalex.org/W1813659000","https://openalex.org/W1983334819","https://openalex.org/W2020024436","https://openalex.org/W2042691334","https://openalex.org/W2045158511","https://openalex.org/W2049036695","https://openalex.org/W2049686551","https://openalex.org/W2086796102","https://openalex.org/W2097117768","https://openalex.org/W2102003408","https://openalex.org/W2106554350","https://openalex.org/W2108674328","https://openalex.org/W2129142580","https://openalex.org/W2136163184","https://openalex.org/W2136922672","https://openalex.org/W2137782235","https://openalex.org/W2154920538","https://openalex.org/W2160815625","https://openalex.org/W2163922914","https://openalex.org/W2166823384","https://openalex.org/W2184045248","https://openalex.org/W2292984643","https://openalex.org/W6601785968","https://openalex.org/W6638304892","https://openalex.org/W6676044216"],"related_works":["https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W2137890270","https://openalex.org/W2041712606","https://openalex.org/W1969924723","https://openalex.org/W1997460882","https://openalex.org/W2108985546","https://openalex.org/W2296454507","https://openalex.org/W2433276473","https://openalex.org/W4297776503"],"abstract_inverted_index":{"This":[0],"letter":[1],"presents":[2],"a":[3,10,26],"method":[4,80,88],"of":[5],"deriving":[6],"spectral":[7,34,51,59,92],"features":[8,52,93],"using":[9,89],"deep":[11],"belief":[12],"network":[13],"(DBN)":[14],"for":[15,53,71],"hidden":[16],"Markov":[17],"model":[18],"(HMM)-based":[19],"parametric":[20],"speech":[21],"synthesis.":[22],"At":[23,56],"training":[24],"time,":[25,58],"DBN":[27],"is":[28],"estimated":[29],"to":[30],"represent":[31],"the":[32,64,86],"high-dimensional":[33],"envelopes":[35,60],"and":[36,68,94],"then":[37,69],"transforms":[38],"them":[39],"into":[40],"binary":[41,45],"codes.":[42],"These":[43],"DBN-based":[44],"codes":[46],"(DBCs)":[47],"are":[48,61],"used":[49,70],"as":[50,91],"HMM":[54],"modeling.":[55],"synthesis":[57],"recovered":[62],"from":[63],"predicted":[65],"DBC":[66],"sequences":[67],"waveform":[72],"reconstruction.":[73],"Experimental":[74],"results":[75],"show":[76],"that":[77],"our":[78],"proposed":[79],"can":[81],"achieve":[82],"better":[83],"naturalness":[84],"than":[85],"conventional":[87],"mel-cepstra":[90],"considering":[95],"global":[96],"variance":[97],"(GV)":[98],"during":[99],"parameter":[100],"generation.":[101]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
