{"id":"https://openalex.org/W4408353806","doi":"https://doi.org/10.1109/icassp49660.2025.10888195","title":"MacST: Multi-Accent Speech Synthesis via Text Transliteration for Accent Conversion","display_name":"MacST: Multi-Accent Speech Synthesis via Text Transliteration for Accent Conversion","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408353806","doi":"https://doi.org/10.1109/icassp49660.2025.10888195"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10888195","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108413182","display_name":"Sho Inoue","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sho Inoue","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen (CUHK-Shenzhen),School of Data Science,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen (CUHK-Shenzhen),School of Data Science,Shenzhen,China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100328312","display_name":"Shuai Wang","orcid":"https://orcid.org/0000-0002-7897-2024"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuai Wang","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen (CUHK-Shenzhen),School of Data Science,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen (CUHK-Shenzhen),School of Data Science,Shenzhen,China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007446660","display_name":"Wanxing Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wanxing Wang","raw_affiliation_strings":["NetEase Inc.,Fuxi AI Lab,Hangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NetEase Inc.,Fuxi AI Lab,Hangzhou,China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050166453","display_name":"Pengcheng Zhu","orcid":"https://orcid.org/0000-0001-9867-7041"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengcheng Zhu","raw_affiliation_strings":["NetEase Inc.,Fuxi AI Lab,Hangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NetEase Inc.,Fuxi AI Lab,Hangzhou,China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036369578","display_name":"Mengxiao Bi","orcid":"https://orcid.org/0009-0007-6680-481X"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengxiao Bi","raw_affiliation_strings":["NetEase Inc.,Fuxi AI Lab,Hangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NetEase Inc.,Fuxi AI Lab,Hangzhou,China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen (CUHK-Shenzhen),School of Data Science,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen (CUHK-Shenzhen),School of Data Science,Shenzhen,China","institution_ids":["https://openalex.org/I4210116924"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":7.0351,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.96250422,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9718000292778015,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.8459503054618835},{"id":"https://openalex.org/keywords/transliteration","display_name":"Transliteration","score":0.8171418905258179},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7436307668685913},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.7175168395042419},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6337093710899353},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5293111205101013},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5270400643348694}],"concepts":[{"id":"https://openalex.org/C2776756274","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.8459503054618835},{"id":"https://openalex.org/C520968082","wikidata":"https://www.wikidata.org/wiki/Q134550","display_name":"Transliteration","level":2,"score":0.8171418905258179},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7436307668685913},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.7175168395042419},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6337093710899353},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5293111205101013},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5270400643348694}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10888195","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.4300000071525574}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W2030600769","https://openalex.org/W2151952699","https://openalex.org/W2338181011","https://openalex.org/W2888954148","https://openalex.org/W3015430779","https://openalex.org/W3096567388","https://openalex.org/W3135654121","https://openalex.org/W3178546316","https://openalex.org/W3204009030","https://openalex.org/W3209059054","https://openalex.org/W4296069158","https://openalex.org/W4296069266","https://openalex.org/W4297841851","https://openalex.org/W4366492380","https://openalex.org/W4372260574","https://openalex.org/W4372266858","https://openalex.org/W4372341945","https://openalex.org/W4385245566","https://openalex.org/W4385823180","https://openalex.org/W4385823191","https://openalex.org/W4388821525","https://openalex.org/W4391827315","https://openalex.org/W4392910800","https://openalex.org/W4402112533","https://openalex.org/W4402118925","https://openalex.org/W6603838645","https://openalex.org/W6783867762","https://openalex.org/W6805710207","https://openalex.org/W6810738896","https://openalex.org/W6847363464","https://openalex.org/W6850334629","https://openalex.org/W6864326080","https://openalex.org/W6936113694","https://openalex.org/W6936129901"],"related_works":["https://openalex.org/W3153459181","https://openalex.org/W2147866274","https://openalex.org/W2350015575","https://openalex.org/W2371976984","https://openalex.org/W2352160949","https://openalex.org/W2378436233","https://openalex.org/W2251148428","https://openalex.org/W2907809867","https://openalex.org/W1990041434","https://openalex.org/W2069398544"],"abstract_inverted_index":{"In":[0,25],"accented":[1,41,77],"voice":[2],"conversion":[3,54,120],"or":[4],"accent":[5,12,53,95,119],"conversion,":[6],"we":[7,28,84],"seek":[8],"to":[9,75],"convert":[10],"the":[11,45,90,99],"in":[13,118],"speech":[14,36,42],"from":[15],"one":[16],"another":[17],"while":[18],"preserving":[19],"speaker":[20],"identity":[21],"and":[22,105,110],"semantic":[23],"content.":[24],"this":[26],"study,":[27],"formulate":[29],"a":[30,81,86],"novel":[31],"method":[32,101],"for":[33,51,94,102],"creating":[34],"multi-accented":[35],"samples,":[37],"thus":[38],"pairs":[39],"of":[40],"samples":[43],"by":[44,58],"same":[46],"speaker,":[47],"through":[48],"text":[49,61],"transliteration":[50],"training":[52],"systems.":[55],"We":[56,97],"begin":[57],"generating":[59],"transliterated":[60],"with":[62],"Large":[63],"Language":[64],"Models":[65],"(LLMs),":[66],"which":[67],"is":[68],"then":[69],"fed":[70],"into":[71],"multilingual":[72],"TTS":[73],"models":[74],"synthesize":[76],"English":[78,107],"speech.":[79],"As":[80],"reference":[82],"system,":[83],"built":[85],"sequence-to-sequence":[87],"model":[88],"on":[89],"synthetic":[91],"parallel":[92],"corpus":[93],"conversion.":[96],"validated":[98],"proposed":[100],"both":[103],"native":[104],"non-native":[106],"speakers.":[108],"Subjective":[109],"objective":[111],"evaluations":[112],"further":[113],"validate":[114],"our":[115],"dataset\u2019s":[116],"effectiveness":[117],"studies.":[121]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
