{"id":"https://openalex.org/W2940064443","doi":"https://doi.org/10.1109/icassp.2019.8682528","title":"Quasi-fully Convolutional Neural Network with Variational Inference for Speech Synthesis","display_name":"Quasi-fully Convolutional Neural Network with Variational Inference for Speech Synthesis","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2940064443","doi":"https://doi.org/10.1109/icassp.2019.8682528","mag":"2940064443"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2019.8682528","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682528","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101730758","display_name":"Mu Wang","orcid":"https://orcid.org/0009-0000-3751-5495"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Mu Wang","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Science, Technologies and Systems, Graduate School at Shenzhen, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Science, Technologies and Systems, Graduate School at Shenzhen, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025119324","display_name":"Xixin Wu","orcid":"https://orcid.org/0000-0001-9543-1572"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xixin Wu","raw_affiliation_strings":["Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"Department of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100667025","display_name":"Zhiyong Wu","orcid":"https://orcid.org/0000-0002-6527-5502"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Zhiyong Wu","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Science, Technologies and Systems, Graduate School at Shenzhen, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Science, Technologies and Systems, Graduate School at Shenzhen, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083463839","display_name":"Shiyin Kang","orcid":"https://orcid.org/0000-0001-8304-5260"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiyin Kang","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043757154","display_name":"Deyi Tuo","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Deyi Tuo","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062105912","display_name":"Guangzhi Li","orcid":"https://orcid.org/0009-0004-9197-7737"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangzhi Li","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075183307","display_name":"Dan Su","orcid":"https://orcid.org/0000-0001-5746-9545"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dan Su","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019458385","display_name":"Helen Meng","orcid":"https://orcid.org/0000-0002-4427-3532"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Helen Meng","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Science, Technologies and Systems, Graduate School at Shenzhen, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Science, Technologies and Systems, Graduate School at Shenzhen, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5101730758"],"corresponding_institution_ids":["https://openalex.org/I889458895","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.5601,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.74049204,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"7060","last_page":"7064"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8124427795410156},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7473787069320679},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6809875965118408},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.6599875092506409},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.573033332824707},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5478188991546631},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.5374932885169983},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.5260089635848999},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4915212094783783},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4165676236152649},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3776162266731262},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.37508058547973633},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33321139216423035},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.18923431634902954}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8124427795410156},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7473787069320679},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6809875965118408},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.6599875092506409},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.573033332824707},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5478188991546631},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.5374932885169983},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.5260089635848999},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4915212094783783},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4165676236152649},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3776162266731262},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.37508058547973633},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33321139216423035},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.18923431634902954},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2019.8682528","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682528","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320316083","display_name":"Tencent","ror":"https://ror.org/00hhjss72"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322392","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549"},{"id":"https://openalex.org/F4320333435","display_name":"Virginia Agricultural Experiment Station, Virginia Polytechnic Institute and State University","ror":null},{"id":"https://openalex.org/F4320335869","display_name":"National Social Science Fund of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1576227399","https://openalex.org/W1959608418","https://openalex.org/W2043003570","https://openalex.org/W2102003408","https://openalex.org/W2129142580","https://openalex.org/W2284050935","https://openalex.org/W2400063444","https://openalex.org/W2467604901","https://openalex.org/W2519091744","https://openalex.org/W2553397501","https://openalex.org/W2769810959","https://openalex.org/W2792764867","https://openalex.org/W2794490148","https://openalex.org/W2795109282","https://openalex.org/W2944438111","https://openalex.org/W2949382160","https://openalex.org/W2952436057","https://openalex.org/W2963272440","https://openalex.org/W2963609956","https://openalex.org/W2963685250","https://openalex.org/W2963782041","https://openalex.org/W2963927338","https://openalex.org/W2964121744","https://openalex.org/W2964243274","https://openalex.org/W2964307104","https://openalex.org/W4294619240","https://openalex.org/W4295731579","https://openalex.org/W4298580827","https://openalex.org/W6631190155","https://openalex.org/W6640963894","https://openalex.org/W6695676441","https://openalex.org/W6712610176","https://openalex.org/W6720208624","https://openalex.org/W6748409065","https://openalex.org/W6749825310","https://openalex.org/W6750489868","https://openalex.org/W6764398373"],"related_works":["https://openalex.org/W2164147372","https://openalex.org/W2550171623","https://openalex.org/W4253660971","https://openalex.org/W1909292483","https://openalex.org/W2890297197","https://openalex.org/W1428730622","https://openalex.org/W1658560081","https://openalex.org/W2146616055","https://openalex.org/W2899676847","https://openalex.org/W596245619"],"abstract_inverted_index":{"Recurrent":[0],"neural":[1,42,76],"networks,":[2],"such":[3,24],"as":[4],"gated":[5],"recurrent":[6],"units":[7],"(GRUs)":[8],"and":[9,136],"long":[10],"short-term":[11],"memory":[12],"(LSTM),":[13],"are":[14,28,99],"widely":[15],"used":[16,92],"on":[17,50,114],"acoustic":[18,63,83,134],"modeling":[19],"for":[20,53],"speech":[21,54,113],"synthesis.":[22,55],"However,":[23],"sequential":[25],"generating":[26],"processes":[27],"not":[29],"friendly":[30],"to":[31,79,93,147],"today\u2019s":[32],"massively":[33],"parallel":[34,51,88,156],"computing":[35],"devices.":[36],"We":[37,72],"introduce":[38],"a":[39,86,115],"fully":[40],"convolutional":[41],"network":[43],"(CNN)":[44],"model,":[45],"which":[46],"can":[47,109],"effiently":[48],"run":[49],"processers,":[52],"To":[56],"improve":[57],"the":[58,61,81,120,126,151,155],"quality":[59,128],"of":[60,122,132,154],"generated":[62,82,133],"features,":[64],"we":[65,102,143],"strengthen":[66],"our":[67],"model":[68,90],"with":[69,106,117],"variational":[70,107],"inference.":[71],"also":[73],"use":[74,121],"quasi-recurrent":[75],"networks":[77],"(QRNNs)":[78],"smoothen":[80],"features.":[84],"Finally,":[85],"high-quality":[87],"WaveNet":[89,157],"is":[91],"generate":[94,110],"audio":[95],"samples.":[96],"Our":[97],"contributions":[98],"twofold.":[100],"First,":[101],"show":[103,144],"that":[104],"CNNs":[105],"inference":[108],"highly":[111],"natural":[112],"par":[116],"end-to-end":[118],"models;":[119],"QRNNs":[123],"further":[124,148],"improves":[125],"synthetic":[127],"by":[129],"reducing":[130],"trembling":[131],"features":[135],"introduces":[137],"very":[138],"little":[139],"run-time":[140],"overheads.":[141],"Second,":[142],"some":[145],"techniques":[146],"speed":[149],"up":[150],"sampling":[152],"process":[153],"model.":[158]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
