{"id":"https://openalex.org/W2973177731","doi":"https://doi.org/10.21437/interspeech.2019-3191","title":"Building a Mixed-Lingual Neural TTS System with Only Monolingual Data","display_name":"Building a Mixed-Lingual Neural TTS System with Only Monolingual Data","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2973177731","doi":"https://doi.org/10.21437/interspeech.2019-3191","mag":"2973177731"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-3191","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-3191","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009337933","display_name":"Liumeng Xue","orcid":"https://orcid.org/0000-0003-2815-8494"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liumeng Xue","raw_affiliation_strings":["Shaanxi Provincial Key Lab of Speech and Image Information Processing, School of Computer Science, Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Shaanxi Provincial Key Lab of Speech and Image Information Processing, School of Computer Science, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100628753","display_name":"Wei Song","orcid":"https://orcid.org/0000-0002-3148-5827"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Song","raw_affiliation_strings":["JD.com"],"affiliations":[{"raw_affiliation_string":"JD.com","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101552980","display_name":"Guanghui Xu","orcid":"https://orcid.org/0000-0003-2910-9870"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanghui Xu","raw_affiliation_strings":["JD.com"],"affiliations":[{"raw_affiliation_string":"JD.com","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100668966","display_name":"Lei Xie","orcid":"https://orcid.org/0000-0001-8234-0823"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Xie","raw_affiliation_strings":["Shaanxi Provincial Key Lab of Speech and Image Information Processing, School of Computer Science, Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Shaanxi Provincial Key Lab of Speech and Image Information Processing, School of Computer Science, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102765381","display_name":"Zhizheng Wu","orcid":"https://orcid.org/0009-0001-1192-9857"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhizheng Wu","raw_affiliation_strings":["JD.com"],"affiliations":[{"raw_affiliation_string":"JD.com","institution_ids":["https://openalex.org/I4210103986"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5009337933"],"corresponding_institution_ids":["https://openalex.org/I17145004"],"apc_list":null,"apc_paid":null,"fwci":3.7805,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.94779554,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2060","last_page":"2064"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.9124136567115784},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7890767455101013},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.7541121244430542},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.7009965181350708},{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.6605242490768433},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.632103443145752},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5439844131469727},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5132126808166504},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.49230849742889404},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4726019501686096},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.46303805708885193},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.23413586616516113}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.9124136567115784},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7890767455101013},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.7541121244430542},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.7009965181350708},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.6605242490768433},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.632103443145752},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5439844131469727},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5132126808166504},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.49230849742889404},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4726019501686096},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.46303805708885193},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.23413586616516113},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2019-3191","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-3191","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7799999713897705,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W88751538","https://openalex.org/W2078647797","https://openalex.org/W2114010348","https://openalex.org/W2120847449","https://openalex.org/W2133564696","https://openalex.org/W2146927751","https://openalex.org/W2187089797","https://openalex.org/W2395500689","https://openalex.org/W2398071208","https://openalex.org/W2398462115","https://openalex.org/W2525997790","https://openalex.org/W2527729766","https://openalex.org/W2554000662","https://openalex.org/W2612434969","https://openalex.org/W2747921929","https://openalex.org/W2766812927","https://openalex.org/W2788357188","https://openalex.org/W2808706139","https://openalex.org/W2963609956","https://openalex.org/W2964002616","https://openalex.org/W2964243274","https://openalex.org/W4295731579","https://openalex.org/W4298174729","https://openalex.org/W4298857617","https://openalex.org/W4390926441"],"related_works":["https://openalex.org/W2079655441","https://openalex.org/W2912293245","https://openalex.org/W4252942110","https://openalex.org/W1604114751","https://openalex.org/W2032941915","https://openalex.org/W2075706796","https://openalex.org/W4391272374","https://openalex.org/W4403759994","https://openalex.org/W4400309480","https://openalex.org/W2081919107"],"abstract_inverted_index":{"When":[0],"deploying":[1],"a":[2,37,116],"Chinese":[3,15],"neural":[4],"Text-to-Speech":[5],"(TTS)":[6],"system,":[7],"one":[8],"of":[9,78,102],"the":[10,26,29,44,57,76,100,108,112],"challenges":[11,113],"is":[12,40,65],"to":[13,114],"synthesize":[14],"utterances":[16],"with":[17,59,120],"English":[18,74],"phrases":[19],"or":[20],"words":[21],"embedded.This":[22],"paper":[23],"looks":[24],"into":[25,82],"problem":[27,45],"in":[28],"encoder-decoder":[30],"framework":[31],"when":[32],"only":[33,121],"monolingual":[34,69,122],"data":[35,103],"from":[36,46,67],"target":[38],"speaker":[39,49,83,86],"available.Specifically,":[41],"we":[42,80],"view":[43],"two":[47],"aspects:":[48],"consistency":[50,87],"within":[51,88],"an":[52,60,89],"utterance":[53,90],"and":[54,73,91,96,98,110],"naturalness.We":[55],"start":[56],"investigation":[58],"average":[61],"voice":[62],"model":[63,105],"which":[64],"built":[66],"multispeaker":[68],"data,":[70],"i.e.,":[71],"Mandarin":[72],"data.On":[75],"basis":[77],"that,":[79],"look":[81],"embedding":[84,93],"for":[85,94,104],"phoneme":[92],"naturalness":[95],"intelligibility,":[97],"study":[99],"choice":[101],"training.We":[106],"report":[107],"findings":[109],"discuss":[111],"build":[115],"mixed-lingual":[117],"TTS":[118],"system":[119],"data.":[123]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
