{"id":"https://openalex.org/W4391892546","doi":"https://doi.org/10.1109/taslp.2024.3363414","title":"Accented Text-to-Speech Synthesis With Limited Data","display_name":"Accented Text-to-Speech Synthesis With Limited Data","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391892546","doi":"https://doi.org/10.1109/taslp.2024.3363414"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3363414","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3363414","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1109/taslp.2024.3363414","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016792332","display_name":"Xuehao Zhou","orcid":"https://orcid.org/0000-0003-4367-2053"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Xuehao Zhou","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070281927","display_name":"Mingyang Zhang","orcid":"https://orcid.org/0000-0002-7945-0535"},"institutions":[{"id":"https://openalex.org/I4210099586","display_name":"Shenzhen Research Institute of Big Data","ror":"https://ror.org/00z1gwf89","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210099586"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyang Zhang","raw_affiliation_strings":["Shenzhen Research Institute of Big Data, School of Data Science, The Chinese University of Hong Kong, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Research Institute of Big Data, School of Data Science, The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924","https://openalex.org/I4210099586"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101846956","display_name":"Yi Zhou","orcid":"https://orcid.org/0000-0002-8520-8227"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yi Zhou","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102765381","display_name":"Zhizheng Wu","orcid":"https://orcid.org/0009-0001-1192-9857"},"institutions":[{"id":"https://openalex.org/I4210099586","display_name":"Shenzhen Research Institute of Big Data","ror":"https://ror.org/00z1gwf89","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210099586"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhizheng Wu","raw_affiliation_strings":["Shenzhen Research Institute of Big Data, School of Data Science, The Chinese University of Hong Kong, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Research Institute of Big Data, School of Data Science, The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924","https://openalex.org/I4210099586"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":null},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]},{"id":"https://openalex.org/I4210099586","display_name":"Shenzhen Research Institute of Big Data","ror":"https://ror.org/00z1gwf89","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210099586"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN","SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["Shenzhen Research Institute of Big Data, School of Data Science, The Chinese University of Hong Kong, Shenzhen, China","Kriston AI, Xiamen, China","Department of Electrical and Computer Engineering, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Shenzhen Research Institute of Big Data, School of Data Science, The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924","https://openalex.org/I4210099586"]},{"raw_affiliation_string":"Kriston AI, Xiamen, China","institution_ids":[]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5016792332"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":5.1226,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.95746844,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"32","issue":null,"first_page":"1699","last_page":"1711"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5796895027160645},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5673133134841919},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.49368366599082947},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.47185444831848145},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3502991795539856},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33184969425201416},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.06313875317573547}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5796895027160645},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5673133134841919},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49368366599082947},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.47185444831848145},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3502991795539856},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33184969425201416},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.06313875317573547}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3363414","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3363414","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/taslp.2024.3363414","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3363414","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.47999998927116394}],"awards":[{"id":"https://openalex.org/G2799040894","display_name":null,"funder_award_id":"62271432","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4327894984","display_name":null,"funder_award_id":"62376237","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G861645084","display_name":null,"funder_award_id":"B10120210117-KP02","funder_id":"https://openalex.org/F4320322942","funder_display_name":"Chinese University of Hong Kong"}],"funders":[{"id":"https://openalex.org/F4320320696","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322942","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W13231650","https://openalex.org/W107619411","https://openalex.org/W1965568387","https://openalex.org/W1975163393","https://openalex.org/W2000198779","https://openalex.org/W2015419244","https://openalex.org/W2030600769","https://openalex.org/W2071315723","https://openalex.org/W2091653553","https://openalex.org/W2102003408","https://openalex.org/W2111898715","https://openalex.org/W2129142580","https://openalex.org/W2133853764","https://openalex.org/W2135510989","https://openalex.org/W2191668891","https://openalex.org/W2295934277","https://openalex.org/W2400517318","https://openalex.org/W2414861844","https://openalex.org/W2504939716","https://openalex.org/W2511006507","https://openalex.org/W2514457011","https://openalex.org/W2746132399","https://openalex.org/W2889048668","https://openalex.org/W2903739847","https://openalex.org/W2962788625","https://openalex.org/W2963609956","https://openalex.org/W2963945466","https://openalex.org/W2963971656","https://openalex.org/W2964243274","https://openalex.org/W2972359262","https://openalex.org/W2972437137","https://openalex.org/W2972903407","https://openalex.org/W2998572311","https://openalex.org/W3015338123","https://openalex.org/W3015680182","https://openalex.org/W3024747869","https://openalex.org/W3034420534","https://openalex.org/W3094785744","https://openalex.org/W3095537162","https://openalex.org/W3095545636","https://openalex.org/W3135644023","https://openalex.org/W3155483372","https://openalex.org/W3160329778","https://openalex.org/W3168542456","https://openalex.org/W3194347141","https://openalex.org/W4221155853","https://openalex.org/W4233300967","https://openalex.org/W4285412695","https://openalex.org/W4296973425","https://openalex.org/W4297841629","https://openalex.org/W4308614654","https://openalex.org/W4385245566","https://openalex.org/W6603838645","https://openalex.org/W6631190155","https://openalex.org/W6730349644","https://openalex.org/W6749489859","https://openalex.org/W6752888775","https://openalex.org/W6754473786","https://openalex.org/W6763832098","https://openalex.org/W6777445600","https://openalex.org/W6778823374","https://openalex.org/W6794395149","https://openalex.org/W6843477509","https://openalex.org/W6847740417"],"related_works":["https://openalex.org/W2216757598","https://openalex.org/W234770729","https://openalex.org/W4387496629","https://openalex.org/W3013209356","https://openalex.org/W2489800615","https://openalex.org/W3188962172","https://openalex.org/W2772917594","https://openalex.org/W4312825515","https://openalex.org/W4306742369","https://openalex.org/W3204019825"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"an":[3,40,48],"accented":[4,33,41,49,62,71,155,162,176,186],"text-to-speech":[5],"(TTS)":[6],"synthesis":[7],"framework":[8,35],"with":[9,52,138,159,181],"limited":[10,100,183],"training":[11],"data.":[12],"We":[13],"study":[14],"two":[15,38],"aspects":[16],"concerning":[17],"accent":[18,203,211],"rendering:":[19],"phonetic":[20,67,163,170],"(phoneme":[21],"difference)":[22],"and":[23,27,47,55,123,140,148,199,210],"prosodic":[24,77,195],"(pitch":[25],"pattern":[26],"phoneme":[28],"duration)":[29],"variations.":[30],"The":[31,61,132,201],"proposed":[32],"TTS":[34,156,177],"consists":[36],"of":[37,89,102,113,172,185],"models:":[39],"front-end":[42,63],"for":[43,58,104,130],"grapheme-to-phoneme":[44],"(G2P)":[45],"conversion":[46],"acoustic":[50,72,178],"model":[51,73,179],"integrated":[53],"pitch":[54,198],"duration":[56],"predictors":[57],"phoneme-to-Mel-spectrogram":[59],"prediction.":[60],"directly":[64],"models":[65,81,134],"the":[66,70,76,93,105,109,154,169,175,194],"variation,":[68],"while":[69,174],"explicitly":[74],"controls":[75],"variation.":[78],"Specifically,":[79],"both":[80],"are":[82,96,128,135],"first":[83],"pretrained":[84,133],"on":[85,98],"a":[86,99,160,182],"large":[87],"amount":[88,101,184],"data,":[90],"then":[91,136],"only":[92],"accent-related":[94],"layers":[95],"fine-tuned":[97,137,158,180],"data":[103,112,188],"target":[106],"accent.":[107],"In":[108],"experiments,":[110],"speech":[111,187,208],"three":[114],"English":[115,125,143],"accents,":[116,144,173],"i.e.,":[117],"General":[118,141],"American":[119],"English,":[120,122],"Irish":[121],"British":[124],"Received":[126],"Pronunciation,":[127],"used":[129],"pre-training.":[131],"Scottish":[139],"Australian":[142],"respectively.":[145],"Both":[146],"objective":[147],"subjective":[149],"evaluation":[150],"results":[151],"show":[152],"that":[153],"frontend":[157],"small":[161],"lexicon":[164],"(5k":[165],"words)":[166],"effectively":[167,192],"handles":[168],"variation":[171],"(approximately":[189],"3":[190],"minutes)":[191],"improves":[193],"rendering":[196],"including":[197],"duration.":[200],"overall":[202],"modeling":[204],"contributes":[205],"to":[206],"improved":[207],"quality":[209],"similarity.":[212]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
