{"id":"https://openalex.org/W3161296985","doi":"https://doi.org/10.1109/icassp39728.2021.9414718","title":"Parallel Tacotron: Non-Autoregressive and Controllable TTS","display_name":"Parallel Tacotron: Non-Autoregressive and Controllable TTS","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3161296985","doi":"https://doi.org/10.1109/icassp39728.2021.9414718","mag":"3161296985"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9414718","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414718","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063926383","display_name":"Isaac Elias","orcid":"https://orcid.org/0000-0001-9035-5419"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Isaac Elias","raw_affiliation_strings":["Google"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003420204","display_name":"Heiga Zen","orcid":"https://orcid.org/0000-0002-8959-5471"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heiga Zen","raw_affiliation_strings":["Google"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021278619","display_name":"Jonathan Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonathan Shen","raw_affiliation_strings":["Google"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100433648","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0002-9505-1833"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["Google"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102792436","display_name":"Jia Ye","orcid":"https://orcid.org/0000-0002-8000-4911"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ye Jia","raw_affiliation_strings":["Google"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103273436","display_name":"Ron J. Weiss","orcid":"https://orcid.org/0000-0003-2010-4053"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ron J. Weiss","raw_affiliation_strings":["Google"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010253402","display_name":"Yonghui Wu","orcid":"https://orcid.org/0000-0002-6780-6135"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yonghui Wu","raw_affiliation_strings":["Google"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5063926383"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":8.8383,"has_fulltext":false,"cited_by_count":71,"citation_normalized_percentile":{"value":0.98190704,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5709","last_page":"5713"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.9703305959701538},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.850720226764679},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.8310197591781616},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7439976930618286},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7352104187011719},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6647120714187622},{"id":"https://openalex.org/keywords/parallelizable-manifold","display_name":"Parallelizable manifold","score":0.6505258083343506},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6274921298027039},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46220919489860535},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42516443133354187},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.391803115606308},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.31554627418518066},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14980411529541016},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.051626056432724}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.9703305959701538},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.850720226764679},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.8310197591781616},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7439976930618286},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7352104187011719},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6647120714187622},{"id":"https://openalex.org/C148047603","wikidata":"https://www.wikidata.org/wiki/Q1014612","display_name":"Parallelizable manifold","level":2,"score":0.6505258083343506},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6274921298027039},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46220919489860535},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42516443133354187},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.391803115606308},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.31554627418518066},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14980411529541016},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.051626056432724},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9414718","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414718","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":78,"referenced_works":["https://openalex.org/W648786980","https://openalex.org/W1810943226","https://openalex.org/W2016589492","https://openalex.org/W2102003408","https://openalex.org/W2129142580","https://openalex.org/W2133564696","https://openalex.org/W2402737981","https://openalex.org/W2519091744","https://openalex.org/W2542835211","https://openalex.org/W2766812927","https://openalex.org/W2767206889","https://openalex.org/W2794490148","https://openalex.org/W2795109282","https://openalex.org/W2901997113","https://openalex.org/W2903739847","https://openalex.org/W2908336025","https://openalex.org/W2946200149","https://openalex.org/W2949382160","https://openalex.org/W2952809536","https://openalex.org/W2963272440","https://openalex.org/W2963403868","https://openalex.org/W2963434219","https://openalex.org/W2963568578","https://openalex.org/W2963609956","https://openalex.org/W2963623257","https://openalex.org/W2963927338","https://openalex.org/W2964243274","https://openalex.org/W2964307104","https://openalex.org/W2964308564","https://openalex.org/W2969521066","https://openalex.org/W2970730223","https://openalex.org/W2971753973","https://openalex.org/W2972473628","https://openalex.org/W2972702018","https://openalex.org/W2972951102","https://openalex.org/W2996843693","https://openalex.org/W3015282541","https://openalex.org/W3015922793","https://openalex.org/W3015960524","https://openalex.org/W3016021263","https://openalex.org/W3016136182","https://openalex.org/W3025013833","https://openalex.org/W3025793647","https://openalex.org/W3033411150","https://openalex.org/W3033913438","https://openalex.org/W3034949308","https://openalex.org/W3037932933","https://openalex.org/W3091928890","https://openalex.org/W3096442195","https://openalex.org/W3130016944","https://openalex.org/W3150572638","https://openalex.org/W4287761884","https://openalex.org/W4289383906","https://openalex.org/W4294149591","https://openalex.org/W4295731579","https://openalex.org/W4298580827","https://openalex.org/W4385245566","https://openalex.org/W6621543089","https://openalex.org/W6638273328","https://openalex.org/W6675380101","https://openalex.org/W6679434410","https://openalex.org/W6713623176","https://openalex.org/W6739901393","https://openalex.org/W6745697700","https://openalex.org/W6746208923","https://openalex.org/W6748409065","https://openalex.org/W6750489868","https://openalex.org/W6753441378","https://openalex.org/W6755300632","https://openalex.org/W6756197946","https://openalex.org/W6757585730","https://openalex.org/W6763832098","https://openalex.org/W6767453231","https://openalex.org/W6777340644","https://openalex.org/W6778823374","https://openalex.org/W6779337556","https://openalex.org/W6780226713","https://openalex.org/W6784545093"],"related_works":["https://openalex.org/W4285069850","https://openalex.org/W2891970004","https://openalex.org/W2029561777","https://openalex.org/W4240963716","https://openalex.org/W1554502231","https://openalex.org/W172797710","https://openalex.org/W1996916724","https://openalex.org/W1974634278","https://openalex.org/W1596637634","https://openalex.org/W3138614332"],"abstract_inverted_index":{"Although":[0],"neural":[1,26],"end-to-end":[2],"text-to-speech":[3,27,69],"models":[4],"can":[5,84],"synthesize":[6],"highly":[7,42],"natural":[8],"speech,":[9],"there":[10],"is":[11,41],"still":[12],"room":[13],"for":[14],"improvements":[15],"to":[16],"its":[17],"efficiency":[18],"and":[19,47,71,89],"naturalness.":[20,73],"This":[21,36],"paper":[22],"proposes":[23],"a":[24,31,106],"non-autoregressive":[25],"model":[28],"augmented":[29],"with":[30,113],"variational":[32,60],"autoencoder-based":[33],"residual":[34],"encoder.":[35],"model,":[37],"called":[38],"Parallel":[39,103],"Tacotron,":[40],"parallelizable":[43],"during":[44],"both":[45],"training":[46],"inference,":[48],"allowing":[49],"efficient":[50],"synthesis":[51],"on":[52],"modern":[53],"parallel":[54],"hardware.":[55],"The":[56],"use":[57,80],"of":[58,67],"the":[59,63,68,77],"autoencoder":[61],"relaxes":[62],"one-to-many":[64],"mapping":[65],"nature":[66],"problem":[70],"improves":[72],"To":[74],"further":[75],"improve":[76],"naturalness,":[78],"we":[79],"lightweight":[81],"convolutions,":[82],"which":[83],"efficiently":[85],"capture":[86],"local":[87],"contexts,":[88],"introduce":[90],"an":[91],"iterative":[92,97],"spectrogram":[93],"loss":[94],"inspired":[95],"by":[96],"refinement.":[98],"Experimental":[99],"results":[100],"show":[101],"that":[102],"Tacotron":[104],"matches":[105],"strong":[107],"autoregressive":[108],"baseline":[109],"in":[110],"subjective":[111],"evaluations":[112],"significantly":[114],"decreased":[115],"inference":[116],"time.":[117]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":23},{"year":2022,"cited_by_count":21},{"year":2021,"cited_by_count":11}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
