{"id":"https://openalex.org/W4375869018","doi":"https://doi.org/10.1109/icassp49357.2023.10095640","title":"VF-Taco2: Towards Fast and Lightweight Synthesis for Autoregressive Models with Variation Autoencoder and Feature Distillation","display_name":"VF-Taco2: Towards Fast and Lightweight Synthesis for Autoregressive Models with Variation Autoencoder and Feature Distillation","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869018","doi":"https://doi.org/10.1109/icassp49357.2023.10095640"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095640","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095640","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100366624","display_name":"Yuhao Liu","orcid":"https://orcid.org/0000-0001-9636-747X"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuhao Liu","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107748760","display_name":"Gong Cheng","orcid":"https://orcid.org/0009-0004-0272-3541"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Gong","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050763764","display_name":"Longbiao Wang","orcid":"https://orcid.org/0000-0002-4005-5036"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longbiao Wang","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025119324","display_name":"Xixin Wu","orcid":"https://orcid.org/0000-0001-9543-1572"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xixin Wu","raw_affiliation_strings":["The Chinese University of Hong Kong,HK SAR,China","The Chinese University of Hong Kong, HK SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,HK SAR,China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, HK SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007199277","display_name":"Qiuyu Liu","orcid":"https://orcid.org/0000-0003-1645-3365"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiuyu Liu","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017251198","display_name":"Jianwu Dang","orcid":"https://orcid.org/0000-0002-9237-4821"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwu Dang","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100366624"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04148656,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"abs/2204.11806","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.8661733865737915},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.8554182052612305},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7482761144638062},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5359027981758118},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5245136022567749},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.49279335141181946},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.47034743428230286},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.45801180601119995},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.43935665488243103},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.43280842900276184},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4021705985069275},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3922758996486664},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.3888625502586365},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3622013032436371},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10000556707382202}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.8661733865737915},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.8554182052612305},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7482761144638062},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5359027981758118},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5245136022567749},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.49279335141181946},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.47034743428230286},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.45801180601119995},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.43935665488243103},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.43280842900276184},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4021705985069275},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3922758996486664},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3888625502586365},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3622013032436371},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10000556707382202},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095640","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095640","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.550000011920929,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1821462560","https://openalex.org/W1902934009","https://openalex.org/W1959608418","https://openalex.org/W2747329762","https://openalex.org/W2904459034","https://openalex.org/W2962691331","https://openalex.org/W2963091184","https://openalex.org/W2963363373","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2999160446","https://openalex.org/W3010916717","https://openalex.org/W3015440759","https://openalex.org/W3160289600","https://openalex.org/W3196467321","https://openalex.org/W4281702809","https://openalex.org/W4285215423","https://openalex.org/W4385569627","https://openalex.org/W6638523607","https://openalex.org/W6639703010","https://openalex.org/W6640963894","https://openalex.org/W6763832098","https://openalex.org/W6773094808","https://openalex.org/W6793584814","https://openalex.org/W6810378763","https://openalex.org/W6838673894"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2159052453","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W2145836866","https://openalex.org/W2803255133","https://openalex.org/W2909431601"],"abstract_inverted_index":{"With":[0],"the":[1,62,68,121,138],"development":[2],"of":[3,21,64,116],"deep":[4],"learning,":[5],"end-to-end":[6],"neural":[7],"text-to-speech":[8],"(TTS)":[9],"systems":[10,23],"have":[11],"achieved":[12],"significant":[13],"improvements":[14],"in":[15,29,67],"high-quality":[16],"speech":[17,56,117,145],"synthesis.":[18],"However,":[19],"most":[20],"these":[22],"are":[24,140],"attention-based":[25],"autoregressive":[26,70],"models,":[27],"resulting":[28],"slow":[30],"synthesis":[31],"speed":[32],"and":[33,46,72,137],"large":[34,105],"model":[35,71,107],"parameter":[36],"sizes.":[37],"In":[38],"this":[39],"paper,":[40],"we":[41],"propose":[42],"a":[43,74,90,103,113,128],"new":[44],"fast":[45],"lightweight":[47],"TTS":[48],"framework":[49],"named":[50],"VF-Taco2,":[51],"which":[52],"can":[53],"quickly":[54],"synthesize":[55],"without":[57],"GPUs.":[58],"We":[59],"first":[60],"profiled":[61],"complexity":[63],"decoder":[65],"process":[66],"current":[69],"designed":[73],"novel":[75],"multiple":[76],"frames":[77],"prediction":[78],"module":[79],"based":[80],"on":[81,133],"variational":[82],"autoencoder":[83],"(VAE)":[84],"to":[85,101,108,120],"alleviate":[86],"quality":[87,146],"degradation":[88],"when":[89],"larger":[91],"\"reduction":[92],"factor\"":[93],"is":[94,99],"applied.":[95],"Besides,":[96],"feature":[97],"distillation":[98],"leveraged":[100],"compress":[102],"relatively":[104],"proposed":[106],"its":[109],"small":[110],"version":[111],"with":[112,144],"minor":[114],"loss":[115],"quality.":[118],"Compared":[119],"original":[122],"Tacotron":[123],"2,":[124],"our":[125],"VF-Taco2":[126],"achieves":[127],"3.6x-4.4x":[129],"Mel-spectrum":[130],"generation":[131],"acceleration":[132],"different":[134],"performance":[135],"CPUs,":[136],"parameters":[139],"compressed":[141],"by":[142],"1.5x":[143],"maintained.":[147]},"counts_by_year":[],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
