{"id":"https://openalex.org/W4402112072","doi":"https://doi.org/10.21437/interspeech.2024-969","title":"An Initial Investigation of Language Adaptation for TTS Systems under Low-resource Scenarios","display_name":"An Initial Investigation of Language Adaptation for TTS Systems under Low-resource Scenarios","publication_year":2024,"publication_date":"2024-09-01","ids":{"openalex":"https://openalex.org/W4402112072","doi":"https://doi.org/10.21437/interspeech.2024-969"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2024-969","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2024-969","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/12d8b30c-73a3-426f-8d0d-468e6f859772","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107748760","display_name":"Gong Cheng","orcid":"https://orcid.org/0009-0004-0272-3541"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cheng Gong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082349516","display_name":"Erica Cooper","orcid":"https://orcid.org/0000-0002-2978-2793"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Erica Cooper","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115591433","display_name":"Xin Wang","orcid":"https://orcid.org/0009-0004-2370-0779"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028353824","display_name":"Chunyu Qiang","orcid":"https://orcid.org/0009-0007-2290-3074"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chunyu Qiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003895235","display_name":"Mengzhe Geng","orcid":"https://orcid.org/0000-0002-7886-439X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mengzhe Geng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060652752","display_name":"Dan Wells","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dan Wells","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101745213","display_name":"Longbiao Wang","orcid":"https://orcid.org/0000-0002-8094-6861"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Longbiao Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101515499","display_name":"Jianwu Dang","orcid":"https://orcid.org/0000-0003-4504-5758"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianwu Dang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108855147","display_name":"M. Tessier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marc Tessier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028411499","display_name":"Aidan Pine","orcid":"https://orcid.org/0000-0001-6552-5324"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aidan Pine","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055415493","display_name":"Korin Richmond","orcid":"https://orcid.org/0000-0003-1450-8270"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Korin Richmond","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5107748760"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4576,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.62762389,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4963","last_page":"4967"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8701000213623047,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8701000213623047,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.840499997138977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.736299991607666,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.7488293647766113},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7388373017311096},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.5258808135986328},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.07653927803039551},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.06603631377220154}],"concepts":[{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.7488293647766113},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7388373017311096},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.5258808135986328},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.07653927803039551},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.06603631377220154},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2024-969","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2024-969","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2024","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/12d8b30c-73a3-426f-8d0d-468e6f859772","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/12d8b30c-73a3-426f-8d0d-468e6f859772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Gong, C, Cooper, E, Wang, X, Qiang, C, Geng, M, Wells, D, Wang, L, Dang, J, Tessier, M, Pine, A, Richmond, K & Yamagishi, J 2024, An initial investigation of language adaptation for TTS systems under low-resource scenarios. in Interspeech 2024. Interspeech, pp. 1-5, The 25th Interspeech Conference, Kos Island, Greece, 1/09/24. https://doi.org/10.21437/Interspeech.2024-969","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/12d8b30c-73a3-426f-8d0d-468e6f859772","is_oa":true,"landing_page_url":"https://hdl.handle.net/20.500.11820/12d8b30c-73a3-426f-8d0d-468e6f859772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Gong, C, Cooper, E, Wang, X, Qiang, C, Geng, M, Wells, D, Wang, L, Dang, J, Tessier, M, Pine, A, Richmond, K & Yamagishi, J 2024, An initial investigation of language adaptation for TTS systems under low-resource scenarios. in Interspeech 2024. Interspeech, pp. 1-5, The 25th Interspeech Conference, Kos Island, Greece, 1/09/24. https://doi.org/10.21437/Interspeech.2024-969","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/12d8b30c-73a3-426f-8d0d-468e6f859772","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/12d8b30c-73a3-426f-8d0d-468e6f859772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Gong, C, Cooper, E, Wang, X, Qiang, C, Geng, M, Wells, D, Wang, L, Dang, J, Tessier, M, Pine, A, Richmond, K & Yamagishi, J 2024, An initial investigation of language adaptation for TTS systems under low-resource scenarios. in Interspeech 2024. Interspeech, pp. 1-5, The 25th Interspeech Conference, Kos Island, Greece, 1/09/24. https://doi.org/10.21437/Interspeech.2024-969","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Self-supervised":[0],"learning":[1],"(SSL)":[2],"representations":[3],"from":[4],"massively":[5],"multilingual":[6,40],"models":[7],"offer":[8],"a":[9,37],"promising":[10],"solution":[11],"for":[12,108],"low-resource":[13],"language":[14,19,32,78,126],"speech":[15,119],"tasks.":[16],"Despite":[17],"advancements,":[18],"adaptation":[20,33,84],"in":[21,44,66],"TTS":[22,41],"systems":[23],"remains":[24],"an":[25],"open":[26],"problem.":[27],"This":[28],"paper":[29],"explores":[30],"the":[31,64,69,77,81,90],"capability":[34],"of":[35,96],"ZMM-TTS,":[36],"recent":[38],"SSL-based":[39],"system":[42],"proposed":[43],"our":[45,121],"previous":[46],"work.":[47],"We":[48,61],"conducted":[49],"experiments":[50],"on":[51],"12":[52],"languages":[53],"using":[54,105],"limited":[55],"data":[56,107],"with":[57],"various":[58],"fine-tuning":[59,91,109],"configurations.":[60],"demonstrate":[62],"that":[63,89,104],"similarity":[65],"phonetics":[67],"between":[68],"pretraining":[70],"and":[71,94,128],"target":[72,82],"languages,":[73],"as":[74,76],"well":[75],"category,":[79],"affects":[80],"language\u2019s":[83],"performance.":[85],"Additionally,":[86],"we":[87,101],"find":[88],"dataset":[92],"size":[93],"number":[95],"speakers":[97],"influence":[98],"adaptability.":[99],"Surprisingly,":[100],"also":[102],"observed":[103],"paired":[106],"is":[110],"not":[111],"always":[112],"optimal":[113],"compared":[114],"to":[115],"audio-only":[116],"data.":[117],"Beyond":[118],"intelligibility,":[120],"analysis":[122],"covers":[123],"speaker":[124],"similarity,":[125],"identification,":[127],"predicted":[129],"MOS.":[130]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-05-10T08:33:47.465468","created_date":"2025-10-10T00:00:00"}
