{"id":"https://openalex.org/W2963609956","doi":"https://doi.org/10.21437/interspeech.2017-1452","title":"Tacotron: Towards End-to-End Speech Synthesis","display_name":"Tacotron: Towards End-to-End Speech Synthesis","publication_year":2017,"publication_date":"2017-08-16","ids":{"openalex":"https://openalex.org/W2963609956","doi":"https://doi.org/10.21437/interspeech.2017-1452","mag":"2963609956"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2017-1452","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-1452","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103854502","display_name":"Yuxuan Wang","orcid":"https://orcid.org/0000-0001-8269-3354"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuxuan Wang","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066591543","display_name":"RJ Skerry-Ryan","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"R.J. Skerry-Ryan","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024684905","display_name":"Daisy Stanton","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daisy Stanton","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010253402","display_name":"Yonghui Wu","orcid":"https://orcid.org/0000-0002-6780-6135"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yonghui Wu","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103273436","display_name":"Ron J. Weiss","orcid":"https://orcid.org/0000-0003-2010-4053"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ron J. Weiss","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112445699","display_name":"Navdeep Jaitly","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Navdeep Jaitly","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103011963","display_name":"Zongheng Yang","orcid":"https://orcid.org/0000-0001-8716-8743"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zongheng Yang","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101783515","display_name":"Ying Xiao","orcid":"https://orcid.org/0000-0003-4316-4825"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ying Xiao","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100715424","display_name":"Zhifeng Chen","orcid":"https://orcid.org/0000-0001-8631-2424"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhifeng Chen","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017529415","display_name":"Samy Bengio","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samy Bengio","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084327952","display_name":"Quoc Khai Le","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Quoc Le","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024140675","display_name":"Yannis Agiomyrgiannakis","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yannis Agiomyrgiannakis","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052989202","display_name":"Rob Clark","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rob Clark","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026650852","display_name":"Rif A. Saurous","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rif A. Saurous","raw_affiliation_strings":["Google, Inc"],"affiliations":[{"raw_affiliation_string":"Google, Inc","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5103854502"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":107.4661,"has_fulltext":false,"cited_by_count":1702,"citation_normalized_percentile":{"value":0.99953056,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"4006","last_page":"4010"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9682999849319458,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9670000076293945,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.8205375671386719},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5275741815567017},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4875344932079315},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3633856773376465},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.13092485070228577}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.8205375671386719},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5275741815567017},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4875344932079315},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3633856773376465},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.13092485070228577}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2017-1452","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-1452","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W648786980","https://openalex.org/W1522301498","https://openalex.org/W1563460361","https://openalex.org/W1836465849","https://openalex.org/W1869752048","https://openalex.org/W1924770834","https://openalex.org/W2099057450","https://openalex.org/W2120847449","https://openalex.org/W2129142580","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2194775991","https://openalex.org/W2271840356","https://openalex.org/W2327501763","https://openalex.org/W2507771204","https://openalex.org/W2515943672","https://openalex.org/W2519091744","https://openalex.org/W2525778437","https://openalex.org/W2531207078","https://openalex.org/W2584032004","https://openalex.org/W2591927543","https://openalex.org/W2901997113","https://openalex.org/W4295276571","https://openalex.org/W4298261015","https://openalex.org/W4394643672"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W3179968364","https://openalex.org/W2390279801","https://openalex.org/W3196421258","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2151749779","https://openalex.org/W2382290278","https://openalex.org/W2938107654"],"abstract_inverted_index":{"A":[0],"text-to-speech":[1,44],"synthesis":[2,21],"system":[3,96],"typically":[4],"consists":[5],"of":[6,99],"multiple":[7],"stages,":[8],"such":[9],"as":[10],"a":[11,82,93],"text":[12],"analysis":[13],"frontend,":[14],"an":[15,19,41],"acoustic":[16],"model":[17,45,56],"and":[18,30],"audio":[20],"module.Building":[22],"these":[23],"components":[24],"often":[25],"requires":[26],"extensive":[27],"domain":[28],"expertise":[29],"may":[31],"contain":[32],"brittle":[33],"design":[34],"choices.In":[35],"this":[36,78],"paper,":[37],"we":[38],"present":[39,66],"Tacotron,":[40],"end-to-end":[42],"generative":[43],"that":[46],"synthesizes":[47],"speech":[48,105],"directly":[49],"from":[50,61],"characters.Given":[51],"<text,":[52],"audio>":[53],"pairs,":[54],"the":[55,72,107],"can":[57],"be":[58],"trained":[59],"completely":[60],"scratch":[62],"with":[63],"random":[64],"initialization.We":[65],"several":[67],"key":[68],"techniques":[69],"to":[70],"make":[71],"sequence-tosequence":[73],"framework":[74],"perform":[75],"well":[76],"for":[77],"challenging":[79],"task.Tacotron":[80],"achieves":[81],"3.82":[83],"subjective":[84],"5-scale":[85],"mean":[86],"opinion":[87],"score":[88],"on":[89],"US":[90],"English,":[91],"outperforming":[92],"production":[94],"parametric":[95],"in":[97],"terms":[98],"naturalness.In":[100],"addition,":[101],"since":[102],"Tacotron":[103],"generates":[104],"at":[106],"frame":[108],"level,":[109],"it's":[110],"substantially":[111],"faster":[112],"than":[113],"sample-level":[114],"autoregressive":[115],"methods.":[116]},"counts_by_year":[{"year":2026,"cited_by_count":72},{"year":2025,"cited_by_count":176},{"year":2024,"cited_by_count":172},{"year":2023,"cited_by_count":210},{"year":2022,"cited_by_count":241},{"year":2021,"cited_by_count":313},{"year":2020,"cited_by_count":261},{"year":2019,"cited_by_count":191},{"year":2018,"cited_by_count":54},{"year":2017,"cited_by_count":11},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
