{"id":"https://openalex.org/W4408355978","doi":"https://doi.org/10.1109/icassp49660.2025.10890527","title":"Discrete Unit-based Low-latency Multi-lingual Speech Synthesis for LIMMITS\u201925 Challenge","display_name":"Discrete Unit-based Low-latency Multi-lingual Speech Synthesis for LIMMITS\u201925 Challenge","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408355978","doi":"https://doi.org/10.1109/icassp49660.2025.10890527"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10890527","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890527","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086017661","display_name":"Yu Jiang","orcid":"https://orcid.org/0000-0002-0728-5673"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yu Jiang","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Cheng Gong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]},{"id":"https://openalex.org/I4387153335","display_name":"China Telecom","ror":"https://ror.org/05p67dv18","country_code":null,"type":"company","lineage":["https://openalex.org/I4387153335"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Gong","raw_affiliation_strings":["Institute of Artificial Intelligence (TeleAI), China Telecom"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence (TeleAI), China Telecom","institution_ids":["https://openalex.org/I4210136246","https://openalex.org/I4387153335"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049261843","display_name":"Tianrui Wang","orcid":"https://orcid.org/0009-0005-1517-9589"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianrui Wang","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028353824","display_name":"Chunyu Qiang","orcid":"https://orcid.org/0009-0007-2290-3074"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunyu Qiang","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032698563","display_name":"Haoyu Wang","orcid":"https://orcid.org/0000-0002-3604-4799"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyu Wang","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050293960","display_name":"Qiuyu Liu","orcid":"https://orcid.org/0009-0000-9494-646X"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiuyu Liu","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006690717","display_name":"Yuheng Lu","orcid":"https://orcid.org/0000-0002-2767-0894"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuheng Lu","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103015888","display_name":"Xiaobao Wang","orcid":"https://orcid.org/0000-0001-5086-4964"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaobao Wang","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100450091","display_name":"Xiao-Lei Zhang","orcid":"https://orcid.org/0000-0001-7694-193X"},"institutions":[{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]},{"id":"https://openalex.org/I4387153335","display_name":"China Telecom","ror":"https://ror.org/05p67dv18","country_code":null,"type":"company","lineage":["https://openalex.org/I4387153335"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaolei Zhang","raw_affiliation_strings":["Institute of Artificial Intelligence (TeleAI), China Telecom"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence (TeleAI), China Telecom","institution_ids":["https://openalex.org/I4210136246","https://openalex.org/I4387153335"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101745213","display_name":"Longbiao Wang","orcid":"https://orcid.org/0000-0002-8094-6861"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longbiao Wang","raw_affiliation_strings":["Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application, College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017251198","display_name":"Jianwu Dang","orcid":"https://orcid.org/0000-0002-9237-4821"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwu Dang","raw_affiliation_strings":["Chinese Academy of Sciences,Shenzhen Institute of Advanced Technology,China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Shenzhen Institute of Advanced Technology,China","institution_ids":["https://openalex.org/I4210145761","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5086017661"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01853889,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9749000072479248,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7474363446235657},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5608119964599609},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5567281246185303},{"id":"https://openalex.org/keywords/unit","display_name":"Unit (ring theory)","score":0.47752702236175537},{"id":"https://openalex.org/keywords/low-latency","display_name":"Low latency (capital markets)","score":0.456811785697937},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4157235026359558},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.17887303233146667},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.11518022418022156},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0820067822933197}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7474363446235657},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5608119964599609},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5567281246185303},{"id":"https://openalex.org/C122637931","wikidata":"https://www.wikidata.org/wiki/Q118084","display_name":"Unit (ring theory)","level":2,"score":0.47752702236175537},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.456811785697937},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4157235026359558},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.17887303233146667},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.11518022418022156},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0820067822933197},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10890527","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890527","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W4402301063","https://openalex.org/W4409763559","https://openalex.org/W6780218876","https://openalex.org/W6862435102"],"related_works":["https://openalex.org/W611386996","https://openalex.org/W2371343292","https://openalex.org/W3205411230","https://openalex.org/W4286899009","https://openalex.org/W9168048","https://openalex.org/W4300849822","https://openalex.org/W4376480820","https://openalex.org/W3155891479","https://openalex.org/W3029351463","https://openalex.org/W4308600690"],"abstract_inverted_index":{"In":[0,28,43],"this":[1],"paper,":[2],"we":[3,31,46],"present":[4],"the":[5,13,33,53],"system":[6],"developed":[7],"by":[8,51],"our":[9,74,102],"team,":[10],"CCATTS,":[11],"for":[12],"LIMMITS\u201925":[14],"challenge,":[15],"focusing":[16],"on":[17],"few-shot":[18],"and":[19,37,69,88],"zero-shot":[20],"TTS.":[21,42],"We":[22],"adopt":[23],"a":[24,48,82],"two-stage":[25],"TTS":[26],"strategy.":[27],"track":[29,44],"1,":[30],"fine-tune":[32],"pre-trained":[34],"ZMM-TTS":[35],"model":[36,55],"successfully":[38],"achieve":[39],"multilingual":[40],"low-latency":[41],"2,":[45],"propose":[47],"token-based":[49],"framework":[50],"modifying":[52],"first-stage":[54],"of":[56,64,81],"ZMM-TTS,":[57],"disentangling":[58],"speech":[59],"features":[60],"into":[61],"four":[62],"types":[63],"discrete":[65],"tokens\u2014Content,":[66],"Acoustic,":[67],"Emotion,":[68],"Speaker\u2014and":[70],"integrating":[71],"it":[72],"with":[73],"designed":[75],"token2wav":[76],"module.":[77],"This":[78],"module":[79],"consists":[80],"HiFi-GAN-style":[83],"decoder,":[84],"an":[85],"acoustic":[86],"refiner,":[87],"U-Net":[89],"flow":[90],"matching,":[91],"to":[92],"generate":[93],"high-quality":[94],"speech.":[95],"The":[96],"official":[97],"competition":[98],"results":[99],"demonstrate":[100],"that":[101],"method":[103],"achieves":[104],"strong":[105],"performance":[106],"in":[107],"both":[108],"tracks.":[109]},"counts_by_year":[],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
