{"id":"https://openalex.org/W4385822470","doi":"https://doi.org/10.21437/interspeech.2023-1212","title":"Parameter-Efficient Learning for Text-to-Speech Accent Adaptation","display_name":"Parameter-Efficient Learning for Text-to-Speech Accent Adaptation","publication_year":2023,"publication_date":"2023-08-14","ids":{"openalex":"https://openalex.org/W4385822470","doi":"https://doi.org/10.21437/interspeech.2023-1212"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2023-1212","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-1212","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016250453","display_name":"Li-Jen Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Li-Jen Yang","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020376803","display_name":"Chao-Han Huck Yang","orcid":"https://orcid.org/0000-0003-2879-8811"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chao-Han Huck Yang","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061908942","display_name":"Jen\u2010Tzung Chien","orcid":"https://orcid.org/0000-0003-3466-8941"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Jen-Tzung Chien","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5061908942"],"corresponding_institution_ids":["https://openalex.org/I148366613"],"apc_list":null,"apc_paid":null,"fwci":1.8299,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.86355386,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4354","last_page":"4358"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9466999769210815,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9028000235557556,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.7778425216674805},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7545467615127563},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.7216476202011108},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6262353658676147},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44461941719055176},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4303067922592163},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.14918109774589539}],"concepts":[{"id":"https://openalex.org/C2776756274","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.7778425216674805},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7545467615127563},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.7216476202011108},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6262353658676147},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44461941719055176},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4303067922592163},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.14918109774589539},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2023-1212","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-1212","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.4699999988079071}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W200659727","https://openalex.org/W2002417021","https://openalex.org/W2015631279","https://openalex.org/W2056985955","https://openalex.org/W2165143604","https://openalex.org/W2890964092","https://openalex.org/W2949573729","https://openalex.org/W2962780374","https://openalex.org/W2963211188","https://openalex.org/W2964303773","https://openalex.org/W3015338123","https://openalex.org/W3030437843","https://openalex.org/W3036167779","https://openalex.org/W3036601975","https://openalex.org/W3094002217","https://openalex.org/W3128910262","https://openalex.org/W3144619878","https://openalex.org/W3166396011","https://openalex.org/W3172148458","https://openalex.org/W3172443934","https://openalex.org/W3174784402","https://openalex.org/W3195577433","https://openalex.org/W3201225328","https://openalex.org/W3204696009","https://openalex.org/W3205949070","https://openalex.org/W4205991051","https://openalex.org/W4206471589","https://openalex.org/W4226162428","https://openalex.org/W4226316089","https://openalex.org/W4286981949","https://openalex.org/W4292779060","https://openalex.org/W4296069158","https://openalex.org/W4298312696","https://openalex.org/W4307783813","https://openalex.org/W4312884055","https://openalex.org/W4312933868","https://openalex.org/W4319585922","https://openalex.org/W4320165905","https://openalex.org/W4323066695","https://openalex.org/W4372260195","https://openalex.org/W4372346241"],"related_works":["https://openalex.org/W1571518467","https://openalex.org/W87991986","https://openalex.org/W2020291234","https://openalex.org/W2789919619","https://openalex.org/W4307766460","https://openalex.org/W2293457016","https://openalex.org/W3169305685","https://openalex.org/W2094520212","https://openalex.org/W2001850503","https://openalex.org/W2351428524"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,9,19,44,70],"parameter-efficient":[4,144],"learning":[5],"(PEL)":[6],"to":[7,30,36,68,84,96],"develop":[8],"low-resource":[10],"accent":[11,131],"adaptation":[12,17],"for":[13,56],"text-to-speech":[14],"(TTS).A":[15],"resource-efficient":[16],"from":[18],"frozen":[20],"pre-trained":[21,74],"TTS":[22,57],"model":[23,125,153],"is":[24,66,113],"developed":[25],"by":[26,43,115],"using":[27],"only":[28],"1.2%":[29],"0.8%":[31],"of":[32,47,110],"original":[33],"trainable":[34],"parameters":[35],"achieve":[37,140],"competitive":[38,141],"performance":[39,99,154],"in":[40,82],"voice":[41],"synthesis.Motivated":[42],"theoretical":[45],"foundation":[46],"optimal":[48],"transport":[49],"(OT),":[50],"this":[51,92,111],"study":[52],"carries":[53],"out":[54],"PEL":[55,117],"where":[58],"an":[59],"auxiliary":[60,149],"unsupervised":[61,93,150],"loss":[62,94,151],"based":[63,119],"on":[64,120],"OT":[65],"introduced":[67],"maximize":[69],"difference":[71],"between":[72],"the":[73,78,129,136,148],"source":[75],"domain":[76],"and":[77,124,147],"(unseen)":[79],"target":[80],"domain,":[81],"addition":[83],"its":[85],"supervised":[86],"training":[87],"loss.Further,":[88],"we":[89],"leverage":[90],"upon":[91],"refinement":[95],"boost":[97],"system":[98],"via":[100],"either":[101],"sliced":[102],"Wasserstein":[103],"distance":[104],"or":[105],"maximum":[106],"mean":[107],"discrepancy.The":[108],"merit":[109],"work":[112],"demonstrated":[114],"fulfilling":[116],"solutions":[118],"residual":[121],"adapter":[122],"learning,":[123],"reprogramming":[126],"when":[127],"evaluating":[128],"Mandarin":[130],"adaptation.Experiment":[132],"results":[133],"show":[134],"that":[135],"proposed":[137],"methods":[138],"can":[139],"naturalness":[142],"with":[143],"decoder":[145],"fine-tuning,":[146],"improves":[152],"empirically.":[155]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
