{"id":"https://openalex.org/W2972473628","doi":"https://doi.org/10.21437/interspeech.2019-2668","title":"Learning to Speak Fluently in a Foreign Language: Multilingual Speech Synthesis and Cross-Language Voice Cloning","display_name":"Learning to Speak Fluently in a Foreign Language: Multilingual Speech Synthesis and Cross-Language Voice Cloning","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2972473628","doi":"https://doi.org/10.21437/interspeech.2019-2668","mag":"2972473628"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-2668","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100433648","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0002-9505-1833"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yu Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103273436","display_name":"Ron J. Weiss","orcid":"https://orcid.org/0000-0003-2010-4053"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ron J. Weiss","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003420204","display_name":"Heiga Zen","orcid":"https://orcid.org/0000-0002-8959-5471"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heiga Zen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010253402","display_name":"Yonghui Wu","orcid":"https://orcid.org/0000-0002-6780-6135"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yonghui Wu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100715424","display_name":"Zhifeng Chen","orcid":"https://orcid.org/0000-0001-8631-2424"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhifeng Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066591543","display_name":"RJ Skerry-Ryan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"R.J. Skerry-Ryan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102792436","display_name":"Jia Ye","orcid":"https://orcid.org/0000-0002-8000-4911"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye Jia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102902866","display_name":"Andrew Rosenberg","orcid":"https://orcid.org/0000-0003-1780-4390"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andrew Rosenberg","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5071715737","display_name":"Bhuvana Ramabhadran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhuvana Ramabhadran","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100433648"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":14.7047,"has_fulltext":false,"cited_by_count":158,"citation_normalized_percentile":{"value":0.99136621,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2080","last_page":"2084"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.960099995136261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9581000208854675,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.661013126373291},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5115073919296265},{"id":"https://openalex.org/keywords/cloning","display_name":"Cloning (programming)","score":0.46162062883377075},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4281453788280487},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.42219868302345276},{"id":"https://openalex.org/keywords/foreign-language","display_name":"Foreign language","score":0.415443480014801},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35765209794044495},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.14406907558441162}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.661013126373291},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5115073919296265},{"id":"https://openalex.org/C121050878","wikidata":"https://www.wikidata.org/wiki/Q5135020","display_name":"Cloning (programming)","level":2,"score":0.46162062883377075},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4281453788280487},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42219868302345276},{"id":"https://openalex.org/C114010052","wikidata":"https://www.wikidata.org/wiki/Q150352","display_name":"Foreign language","level":2,"score":0.415443480014801},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35765209794044495},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14406907558441162},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2019-2668","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7599999904632568,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1544828620","https://openalex.org/W1731081199","https://openalex.org/W1959608418","https://openalex.org/W2024678752","https://openalex.org/W2025638820","https://openalex.org/W2471520273","https://openalex.org/W2494654097","https://openalex.org/W2519091744","https://openalex.org/W2604184139","https://openalex.org/W2788357188","https://openalex.org/W2794235490","https://openalex.org/W2794490148","https://openalex.org/W2808706139","https://openalex.org/W2884607399","https://openalex.org/W2892620417","https://openalex.org/W2901389167","https://openalex.org/W2901997113","https://openalex.org/W2907262790","https://openalex.org/W2962691331","https://openalex.org/W2963691546","https://openalex.org/W2963827314","https://openalex.org/W2963964591","https://openalex.org/W2964002616","https://openalex.org/W2964243274","https://openalex.org/W4289383906","https://openalex.org/W4298174729","https://openalex.org/W4298580827"],"related_works":["https://openalex.org/W2362782207","https://openalex.org/W2030283002","https://openalex.org/W1987653914","https://openalex.org/W2994004883","https://openalex.org/W2388174443","https://openalex.org/W2045225859","https://openalex.org/W2089054857","https://openalex.org/W1576765900","https://openalex.org/W4251499038","https://openalex.org/W2044609815"],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,65,134],"multispeaker,":[3],"multilingual":[4],"text-to-speech":[5],"(TTS)":[6],"synthesis":[7],"model":[8,24,73,87,112,135],"based":[9],"on":[10,43,115],"Tacotron":[11],"that":[12],"is":[13,25,96],"able":[14,26],"to":[15,27,58,69,84,88,126,140],"produce":[16],"high":[17],"quality":[18],"speech":[19,35,107,144],"in":[20,101,133,148,155],"multiple":[21,116],"languages.Moreover,":[22],"the":[23,86,102,106,111],"transfer":[28,49],"voices":[29],"across":[30,51,75],"languages,":[31,54,76],"e.g.synthesize":[32],"fluent":[33],"Spanish":[34],"using":[36,64],"an":[37,80,123],"English":[38],"speaker's":[39],"voice,":[40],"without":[41],"training":[42,103,114,146],"any":[44],"bilingual":[45],"or":[46,157],"parallel":[47],"examples.Such":[48],"works":[50],"distantly":[52],"related":[53],"e.g.English":[55],"and":[56,77,121,154],"Mandarin.Critical":[57],"achieving":[59],"this":[60],"result":[61],"are:":[62],"1.":[63],"phonemic":[66],"input":[67,125],"representation":[68,91],"encourage":[70,85],"sharing":[71],"of":[72,92,118],"capacity":[74],"2.":[78],"incorporating":[79,122],"adversarial":[81],"loss":[82],"term":[83],"disentangle":[89],"its":[90],"speaker":[93],"identity":[94],"(which":[95],"perfectly":[97],"correlated":[98],"with":[99],"language":[100],"data)":[104],"from":[105],"content.Further":[108],"scaling":[109],"up":[110],"by":[113],"speakers":[117,147],"each":[119],"language,":[120],"autoencoding":[124],"help":[127],"stabilize":[128],"attention":[129],"during":[130,152],"training,":[131,153],"results":[132],"which":[136],"can":[137],"be":[138],"used":[139],"consistently":[141],"synthesize":[142],"intelligible":[143],"for":[145],"all":[149],"languages":[150],"seen":[151],"native":[156],"foreign":[158],"accents.":[159]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":25},{"year":2022,"cited_by_count":31},{"year":2021,"cited_by_count":41},{"year":2020,"cited_by_count":28}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
