{"id":"https://openalex.org/W4206596421","doi":"https://doi.org/10.1109/access.2022.3141200","title":"Transfer Learning, Style Control, and Speaker Reconstruction Loss for Zero-Shot Multilingual Multi-Speaker Text-to-Speech on Low-Resource Languages","display_name":"Transfer Learning, Style Control, and Speaker Reconstruction Loss for Zero-Shot Multilingual Multi-Speaker Text-to-Speech on Low-Resource Languages","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4206596421","doi":"https://doi.org/10.1109/access.2022.3141200"},"language":"en","primary_location":{"id":"doi:10.1109/access.2022.3141200","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3141200","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9668973/09673749.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/9668973/09673749.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066073372","display_name":"Kurniawati Azizah","orcid":"https://orcid.org/0000-0002-3217-7025"},"institutions":[{"id":"https://openalex.org/I29617571","display_name":"University of Indonesia","ror":"https://ror.org/0116zj450","country_code":"ID","type":"education","lineage":["https://openalex.org/I29617571"]}],"countries":["ID"],"is_corresponding":true,"raw_author_name":"Kurniawati Azizah","raw_affiliation_strings":["Faculty of Computer Science Faculty, Universitas Indonesia, Depok, Indonesia"],"raw_orcid":"https://orcid.org/0000-0002-3217-7025","affiliations":[{"raw_affiliation_string":"Faculty of Computer Science Faculty, Universitas Indonesia, Depok, Indonesia","institution_ids":["https://openalex.org/I29617571"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069933043","display_name":"Wisnu Jatmiko","orcid":"https://orcid.org/0000-0002-0530-7955"},"institutions":[{"id":"https://openalex.org/I29617571","display_name":"University of Indonesia","ror":"https://ror.org/0116zj450","country_code":"ID","type":"education","lineage":["https://openalex.org/I29617571"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Wisnu Jatmiko","raw_affiliation_strings":["Faculty of Computer Science Faculty, Universitas Indonesia, Depok, Indonesia"],"raw_orcid":"https://orcid.org/0000-0002-0530-7955","affiliations":[{"raw_affiliation_string":"Faculty of Computer Science Faculty, Universitas Indonesia, Depok, Indonesia","institution_ids":["https://openalex.org/I29617571"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5066073372"],"corresponding_institution_ids":["https://openalex.org/I29617571"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":2.0831,"has_fulltext":true,"cited_by_count":15,"citation_normalized_percentile":{"value":0.88507098,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"10","issue":null,"first_page":"5895","last_page":"5911"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8186821341514587},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6431671380996704},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.6134507060050964},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5684190988540649},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47650304436683655},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.4638727903366089},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.4601401090621948},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.453968346118927},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.45389530062675476},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4347565770149231},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.41130346059799194},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36743873357772827}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8186821341514587},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6431671380996704},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.6134507060050964},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5684190988540649},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47650304436683655},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.4638727903366089},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.4601401090621948},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.453968346118927},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.45389530062675476},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4347565770149231},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.41130346059799194},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36743873357772827},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2022.3141200","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3141200","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9668973/09673749.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:b7fce48ac7b14c24880e694c25a4bd25","is_oa":true,"landing_page_url":"https://doaj.org/article/b7fce48ac7b14c24880e694c25a4bd25","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 10, Pp 5895-5911 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2022.3141200","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3141200","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9668973/09673749.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6100000143051147,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G5166413833","display_name":null,"funder_award_id":"NKB-547/UN2.RST/HKP.05.00/2021","funder_id":"https://openalex.org/F4320323819","funder_display_name":"Universitas Indonesia"}],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320323819","display_name":"Universitas Indonesia","ror":"https://ror.org/0116zj450"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4206596421.pdf","grobid_xml":"https://content.openalex.org/works/W4206596421.grobid-xml"},"referenced_works_count":80,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2046056978","https://openalex.org/W2095734449","https://openalex.org/W2107860279","https://openalex.org/W2150769028","https://openalex.org/W2165698076","https://openalex.org/W2201142001","https://openalex.org/W2290689761","https://openalex.org/W2331128040","https://openalex.org/W2395579298","https://openalex.org/W2612434969","https://openalex.org/W2748488820","https://openalex.org/W2769810959","https://openalex.org/W2774140536","https://openalex.org/W2775336875","https://openalex.org/W2786672974","https://openalex.org/W2887171382","https://openalex.org/W2887280559","https://openalex.org/W2888968865","https://openalex.org/W2889028433","https://openalex.org/W2890964092","https://openalex.org/W2895654193","https://openalex.org/W2900796929","https://openalex.org/W2903853691","https://openalex.org/W2914049472","https://openalex.org/W2939833446","https://openalex.org/W2947256846","https://openalex.org/W2960427821","https://openalex.org/W2962699523","https://openalex.org/W2962788625","https://openalex.org/W2963035245","https://openalex.org/W2963300588","https://openalex.org/W2963609956","https://openalex.org/W2963796886","https://openalex.org/W2963945466","https://openalex.org/W2964243274","https://openalex.org/W2967957380","https://openalex.org/W2969521066","https://openalex.org/W2972394484","https://openalex.org/W2972440097","https://openalex.org/W2972443522","https://openalex.org/W2972961496","https://openalex.org/W2973032144","https://openalex.org/W2973034126","https://openalex.org/W2976159681","https://openalex.org/W2982037672","https://openalex.org/W2990124956","https://openalex.org/W2992285938","https://openalex.org/W3010925296","https://openalex.org/W3015826515","https://openalex.org/W3016050488","https://openalex.org/W3019993940","https://openalex.org/W3090474612","https://openalex.org/W3096086473","https://openalex.org/W3109182305","https://openalex.org/W3168542456","https://openalex.org/W3168656614","https://openalex.org/W3199044515","https://openalex.org/W4287758476","https://openalex.org/W6631190155","https://openalex.org/W6683085343","https://openalex.org/W6687645958","https://openalex.org/W6732048897","https://openalex.org/W6734815144","https://openalex.org/W6736057607","https://openalex.org/W6737575990","https://openalex.org/W6738277540","https://openalex.org/W6748573829","https://openalex.org/W6748816842","https://openalex.org/W6749489859","https://openalex.org/W6750489868","https://openalex.org/W6752888775","https://openalex.org/W6755135894","https://openalex.org/W6757322325","https://openalex.org/W6765653190","https://openalex.org/W6770838157","https://openalex.org/W6770983605","https://openalex.org/W6778159118","https://openalex.org/W6779259078","https://openalex.org/W7039088390"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2149220986","https://openalex.org/W1493012537","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W1999004162","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2023466863","https://openalex.org/W2696990509"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"network":[2,218],"(DNN)-based":[3],"systems":[4],"generally":[5],"require":[6],"large":[7],"amounts":[8],"of":[9,52,66,116,175,178,253],"training":[10,161,176,201,243],"data,":[11],"so":[12],"they":[13,35],"have":[14,23,37],"data":[15,53,177],"scarcity":[16,54],"problems":[17],"in":[18,25,55,197,227,247,267],"low-resource":[19,60,91,179],"languages.":[20,181],"Recent":[21],"studies":[22],"succeeded":[24,246],"building":[26],"zero-shot":[27,67,117,228],"multi-speaker":[28],"DNN-based":[29,57],"TTS":[30,58,98,124,137,146,169,240],"on":[31,40,59,103,257],"high-resource":[32,105],"languages,":[33],"but":[34],"still":[36],"unsatisfactory":[38],"performance":[39,65,115],"unseen":[41,71,259],"speakers.":[42,72],"This":[43],"study":[44],"addresses":[45],"two":[46],"main":[47],"problems:":[48],"overcoming":[49],"the":[50,56,64,90,108,114,133,168,186,205,234,249,254,258],"problem":[51,92],"languages":[61,271],"and":[62,99,139,219,242,265,269],"improving":[63],"speaker":[68,101,118,135,142,220,225,229,250],"adaptation":[69,230],"for":[70,136,154],"We":[73,148,156,210],"propose":[74,121],"a":[75,82,104,122,172],"novel":[76],"multi-stage":[77],"transfer":[78,86,188],"learning":[79,87,189],"strategy":[80,162,244],"using":[81,171,185,199],"partial":[83],"network-based":[84],"deep":[85],"to":[88,112,165,203,233],"overcome":[89],"by":[93,263],"utilizing":[94],"pre-trained":[95],"monolingual":[96],"single-speaker":[97],"d-vector":[100],"encoder":[102,217],"language":[106],"as":[107],"source":[109],"domain.":[110],"Meanwhile,":[111],"improve":[113],"adaptation,":[119],"we":[120],"new":[123],"model":[125,241],"that":[126,158,213],"incorporates":[127],"an":[128,140],"explicit":[129],"style":[130,216],"control":[131],"from":[132],"target":[134,180],"conditioning":[138],"utterance-level":[141],"reconstruction":[143,221],"loss":[144,222],"during":[145],"training.":[147],"use":[149],"publicly":[150],"available":[151],"speech":[152,194,256],"datasets":[153],"experiments.":[155],"show":[157],"our":[159,214,238],"proposed":[160,187,215,239],"is":[163],"able":[164],"effectively":[166],"train":[167],"models":[170,183,206],"limited":[173],"amount":[174],"The":[182],"trained":[184],"successfully":[190],"produce":[191],"intelligible":[192],"natural":[193],"sounds,":[195],"while":[196],"contrast":[198],"standard":[200],"fails":[202],"make":[204],"synthesize":[207],"understandable":[208],"speech.":[209],"also":[211],"demonstrate":[212],"significantly":[223],"improves":[224],"similarity":[226,252],"task":[231],"compared":[232],"baseline":[235],"model.":[236],"Overall,":[237],"has":[245],"increasing":[248],"cosine":[251],"synthesized":[255],"speakers":[260],"test":[261],"set":[262],"0.468":[264],"0.279":[266],"native":[268],"foreign":[270],"respectively.":[272]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":5}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
