{"id":"https://openalex.org/W4403674685","doi":"https://doi.org/10.1109/taslp.2024.3485466","title":"E$^{3}$TTS: End-to-End Text-Based Speech Editing TTS System and Its Applications","display_name":"E$^{3}$TTS: End-to-End Text-Based Speech Editing TTS System and Its Applications","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4403674685","doi":"https://doi.org/10.1109/taslp.2024.3485466"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3485466","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3485466","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101245167","display_name":"Zheng Liang","orcid":"https://orcid.org/0009-0009-3314-3079"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zheng Liang","raw_affiliation_strings":["X-LANCE Lab, Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"X-LANCE Lab, Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100438492","display_name":"Ziyang Ma","orcid":"https://orcid.org/0000-0002-0623-9114"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziyang Ma","raw_affiliation_strings":["X-LANCE Lab, Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"X-LANCE Lab, Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035532752","display_name":"Chenpeng Du","orcid":"https://orcid.org/0000-0001-5329-0847"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenpeng Du","raw_affiliation_strings":["X-LANCE Lab, Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"X-LANCE Lab, Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043098653","display_name":"Kai Yu","orcid":"https://orcid.org/0000-0002-7102-9826"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Yu","raw_affiliation_strings":["X-LANCE Lab, Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"X-LANCE Lab, Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102771326","display_name":"Xie Chen","orcid":"https://orcid.org/0000-0001-7423-617X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xie Chen","raw_affiliation_strings":["X-LANCE Lab, Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"X-LANCE Lab, Department of Computer Science and Engineering, MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101245167"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.3415,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.67413116,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"32","issue":null,"first_page":"4810","last_page":"4821"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.980400025844574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.980400025844574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.932200014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9258000254631042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6866266131401062},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5772261023521423},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5705070495605469},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5643749237060547},{"id":"https://openalex.org/keywords/end-user","display_name":"End user","score":0.4210565686225891},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.33203285932540894},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2236582636833191},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.13990050554275513},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.062475234270095825}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6866266131401062},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5772261023521423},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5705070495605469},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5643749237060547},{"id":"https://openalex.org/C91262260","wikidata":"https://www.wikidata.org/wiki/Q528074","display_name":"End user","level":2,"score":0.4210565686225891},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.33203285932540894},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2236582636833191},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.13990050554275513},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.062475234270095825}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3485466","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3485466","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3278014984","display_name":null,"funder_award_id":"U23B2018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6999136879","display_name":null,"funder_award_id":"62206171","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2102003408","https://openalex.org/W2107860279","https://openalex.org/W2150658333","https://openalex.org/W2154920538","https://openalex.org/W2428180336","https://openalex.org/W2737697117","https://openalex.org/W2883989418","https://openalex.org/W2886319145","https://openalex.org/W2889012072","https://openalex.org/W2891616026","https://openalex.org/W2896457183","https://openalex.org/W2903250132","https://openalex.org/W2903739847","https://openalex.org/W2936547119","https://openalex.org/W2939069254","https://openalex.org/W2939757332","https://openalex.org/W2962780374","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2972359262","https://openalex.org/W2972625221","https://openalex.org/W2972702443","https://openalex.org/W3007376164","https://openalex.org/W3015265920","https://openalex.org/W3015639015","https://openalex.org/W3041561163","https://openalex.org/W3095918555","https://openalex.org/W3096263335","https://openalex.org/W3096615836","https://openalex.org/W3097912232","https://openalex.org/W3160438760","https://openalex.org/W3161782335","https://openalex.org/W3162244132","https://openalex.org/W3174102645","https://openalex.org/W3198020407","https://openalex.org/W3198533616","https://openalex.org/W3198694222","https://openalex.org/W3202725408","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3217003515","https://openalex.org/W4210811812","https://openalex.org/W4224918838","https://openalex.org/W4225264678","https://openalex.org/W4225596771","https://openalex.org/W4281770669","https://openalex.org/W4283722828","https://openalex.org/W4296068407","https://openalex.org/W4297841480","https://openalex.org/W4297841565","https://openalex.org/W4297841733","https://openalex.org/W4319862717","https://openalex.org/W4372260307","https://openalex.org/W4375869198","https://openalex.org/W4385571941","https://openalex.org/W4385822790","https://openalex.org/W4385823092","https://openalex.org/W4385823152","https://openalex.org/W4393147067","https://openalex.org/W4402672020","https://openalex.org/W6631362777","https://openalex.org/W6638749077","https://openalex.org/W6745697700","https://openalex.org/W6753855596","https://openalex.org/W6763832098","https://openalex.org/W6767111847","https://openalex.org/W6778823374","https://openalex.org/W6779337556","https://openalex.org/W6780815891","https://openalex.org/W6783867762","https://openalex.org/W6795261426","https://openalex.org/W6796464841","https://openalex.org/W6810007534","https://openalex.org/W6810189000","https://openalex.org/W6838843145"],"related_works":["https://openalex.org/W4299590256","https://openalex.org/W2151749779","https://openalex.org/W3163634122","https://openalex.org/W2110442089","https://openalex.org/W3119482857","https://openalex.org/W2919182614","https://openalex.org/W2166381389","https://openalex.org/W4393280045","https://openalex.org/W4403582721","https://openalex.org/W2054736184"],"abstract_inverted_index":{"Text-based":[0],"speech":[1,35,53,63,70,73,84,97,127,186],"editing":[2,36,54,98,187],"aims":[3],"at":[4,86,191],"manipulating":[5,89],"part":[6],"of":[7,27,108,154,183],"real":[8],"audio":[9,157],"by":[10,19,88],"modifying":[11],"the":[12,24,90,132,150,155,184],"corresponding":[13],"transcribed":[14],"text,":[15],"without":[16],"being":[17],"discernible":[18],"human":[20],"auditory":[21],"system.":[22],"With":[23],"enhanced":[25],"capability":[26],"neural":[28],"Text-to-speech":[29],"(TTS),":[30],"researchers":[31],"try":[32],"to":[33,130,159],"tackle":[34],"problems":[37],"with":[38,174],"TTS":[39,55],"methods.":[40,163],"In":[41],"this":[42,192],"paper,":[43],"we":[44,116],"propose":[45],"E<inline-formula":[46,75,118,145],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[47,76,119,143,146,194],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[48,77,120,147],"notation=\"LaTeX\">$^{3}$</tex-math></inline-formula>TTS,":[49],"a.k.a.":[50],"end-to-end":[51],"text-based":[52],"system,":[56],"which":[57,109],"combines":[58],"a":[59,62,66],"text":[60],"encoder,":[61,64],"and":[65,72,82,104,138,152,181],"joint":[67],"net":[68],"for":[69,125],"synthesis":[71],"editing.":[74],"notation=\"LaTeX\">$^{3}$</tex-math></inline-formula>TTS":[78,121,148],"can":[79],"insert,":[80],"replace,":[81],"delete":[83],"content":[85],"will,":[87],"given":[91],"text.":[92],"Experiments":[93],"show":[94,167],"that":[95],"our":[96],"outperforms":[99],"strong":[100],"baselines":[101],"on":[102],"HiFiTTS":[103],"LibriTTS":[105],"datasets,":[106],"speakers":[107],"are":[110,189],"seen":[111],"or":[112],"unseen,":[113],"respectively.":[114],"Further,":[115],"introduce":[117],"into":[122],"data":[123,133,161,177],"augmentation":[124,162],"automatic":[126],"recognition":[128,141],"(ASR)":[129],"mitigate":[131],"insufficiency":[134],"problem":[135],"in":[136],"code-switching":[137],"named":[139],"entity":[140],"scenarios<sup":[142],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>.":[144],"retains":[149],"coherence":[151],"reality":[153],"recorded":[156],"compared":[158],"past":[160],"The":[164,179],"experimental":[165],"results":[166],"significant":[168],"performance":[169],"improvements":[170],"over":[171],"baseline":[172],"systems":[173],"traditional":[175],"TTS-based":[176],"augmentation.":[178],"code":[180],"samples":[182],"proposed":[185],"model":[188],"available":[190],"repository.<sup":[193],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>":[195]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
