{"id":"https://openalex.org/W4390507420","doi":"https://doi.org/10.1186/s13634-023-01096-x","title":"A multi-task learning speech synthesis optimization method based on CWT: a case study of Tacotron2","display_name":"A multi-task learning speech synthesis optimization method based on CWT: a case study of Tacotron2","publication_year":2024,"publication_date":"2024-01-02","ids":{"openalex":"https://openalex.org/W4390507420","doi":"https://doi.org/10.1186/s13634-023-01096-x"},"language":"en","primary_location":{"id":"doi:10.1186/s13634-023-01096-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13634-023-01096-x","pdf_url":"https://asp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13634-023-01096-x","source":{"id":"https://openalex.org/S35920007","display_name":"EURASIP Journal on Advances in Signal Processing","issn_l":"1687-6172","issn":["1687-6172","1687-6180"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Advances in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://asp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13634-023-01096-x","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025777969","display_name":"Guoqiang Hu","orcid":"https://orcid.org/0000-0001-9333-1498"},"institutions":[{"id":"https://openalex.org/I159948400","display_name":"Jinan University","ror":"https://ror.org/02xe5ns62","country_code":"CN","type":"education","lineage":["https://openalex.org/I159948400"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoqiang Hu","raw_affiliation_strings":["International School, Jinan University, No. 855 Xingye Avenue East, Guangzhou, 511486, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"International School, Jinan University, No. 855 Xingye Avenue East, Guangzhou, 511486, Guangdong, China","institution_ids":["https://openalex.org/I159948400"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102565572","display_name":"Zhuofan Ruan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhuofan Ruan","raw_affiliation_strings":["Information Hub, The HONG KONG University of Science and Technology(Guangzhou), No.1 Duxue Road, Guangzhou, 511453, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Hub, The HONG KONG University of Science and Technology(Guangzhou), No.1 Duxue Road, Guangzhou, 511453, Guangdong, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048435685","display_name":"Wenqiu Guo","orcid":"https://orcid.org/0000-0003-0204-0746"},"institutions":[{"id":"https://openalex.org/I111950717","display_name":"Macau University of Science and Technology","ror":"https://ror.org/03jqs2n27","country_code":"MO","type":"education","lineage":["https://openalex.org/I111950717","https://openalex.org/I4391767947"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Wenqiu Guo","raw_affiliation_strings":["School of Business, Macau University of Science and Technology, Avenida Wai Long, Macao, 999078, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Business, Macau University of Science and Technology, Avenida Wai Long, Macao, 999078, China","institution_ids":["https://openalex.org/I111950717"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005465995","display_name":"Yujuan Quan","orcid":"https://orcid.org/0000-0001-6206-7022"},"institutions":[{"id":"https://openalex.org/I159948400","display_name":"Jinan University","ror":"https://ror.org/02xe5ns62","country_code":"CN","type":"education","lineage":["https://openalex.org/I159948400"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yujuan Quan","raw_affiliation_strings":["College of Information Science and Technology, Jinan University, No. 855 Xingye Avenue East, Guangzhou, 511486, Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0001-6206-7022","affiliations":[{"raw_affiliation_string":"College of Information Science and Technology, Jinan University, No. 855 Xingye Avenue East, Guangzhou, 511486, Guangdong, China","institution_ids":["https://openalex.org/I159948400"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5005465995"],"corresponding_institution_ids":["https://openalex.org/I159948400"],"apc_list":{"value":1140,"currency":"GBP","value_usd":1398},"apc_paid":{"value":1140,"currency":"GBP","value_usd":1398},"fwci":1.9524,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.87114839,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"2024","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.9101017713546753},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.745703399181366},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7067431807518005},{"id":"https://openalex.org/keywords/wavelet","display_name":"Wavelet","score":0.588948667049408},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5284519195556641},{"id":"https://openalex.org/keywords/wavelet-transform","display_name":"Wavelet transform","score":0.49145257472991943},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.47112321853637695},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4668980538845062},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.46279287338256836},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07709920406341553}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.9101017713546753},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.745703399181366},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7067431807518005},{"id":"https://openalex.org/C47432892","wikidata":"https://www.wikidata.org/wiki/Q831390","display_name":"Wavelet","level":2,"score":0.588948667049408},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5284519195556641},{"id":"https://openalex.org/C196216189","wikidata":"https://www.wikidata.org/wiki/Q2867","display_name":"Wavelet transform","level":3,"score":0.49145257472991943},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.47112321853637695},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4668980538845062},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.46279287338256836},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07709920406341553},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1186/s13634-023-01096-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13634-023-01096-x","pdf_url":"https://asp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13634-023-01096-x","source":{"id":"https://openalex.org/S35920007","display_name":"EURASIP Journal on Advances in Signal Processing","issn_l":"1687-6172","issn":["1687-6172","1687-6180"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Advances in Signal Processing","raw_type":"journal-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-136072","is_oa":false,"landing_page_url":"https://repository.hkust.edu.hk/ir/Record/1783.1-136072","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:doaj.org/article:744e9b716f2c4c4ea8e9e145138b58b3","is_oa":true,"landing_page_url":"https://doaj.org/article/744e9b716f2c4c4ea8e9e145138b58b3","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"EURASIP Journal on Advances in Signal Processing, Vol 2024, Iss 1, Pp 1-14 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s13634-023-01096-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13634-023-01096-x","pdf_url":"https://asp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13634-023-01096-x","source":{"id":"https://openalex.org/S35920007","display_name":"EURASIP Journal on Advances in Signal Processing","issn_l":"1687-6172","issn":["1687-6172","1687-6180"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Advances in Signal Processing","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4300000071525574,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1442721391","display_name":null,"funder_award_id":"2021A1515011999","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G6253617823","display_name":null,"funder_award_id":"21619412","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320322292","display_name":"Jinan University","ror":"https://ror.org/02xe5ns62"},{"id":"https://openalex.org/F4320335666","display_name":"Key Laboratory of Chemical Biology and Traditional Chinese Medicine Research, Ministry of Education","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390507420.pdf"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W210359992","https://openalex.org/W1499332833","https://openalex.org/W1563645159","https://openalex.org/W1981276685","https://openalex.org/W1996021349","https://openalex.org/W2010698716","https://openalex.org/W2022128907","https://openalex.org/W2025198378","https://openalex.org/W2028706510","https://openalex.org/W2063615912","https://openalex.org/W2064675550","https://openalex.org/W2078841894","https://openalex.org/W2122068629","https://openalex.org/W2131774270","https://openalex.org/W2132984323","https://openalex.org/W2145892079","https://openalex.org/W2152328854","https://openalex.org/W2188563164","https://openalex.org/W2230524333","https://openalex.org/W2395380967","https://openalex.org/W2407793339","https://openalex.org/W2532733969","https://openalex.org/W2586807479","https://openalex.org/W2608070932","https://openalex.org/W2791477301","https://openalex.org/W2803023917","https://openalex.org/W2885195348","https://openalex.org/W2885916054","https://openalex.org/W2903739847","https://openalex.org/W2912697496","https://openalex.org/W2913340405","https://openalex.org/W2923054028","https://openalex.org/W2952555815","https://openalex.org/W2963691546","https://openalex.org/W2964243274","https://openalex.org/W2976159681","https://openalex.org/W3021188327","https://openalex.org/W3130016944","https://openalex.org/W3164649044","https://openalex.org/W4225897731","https://openalex.org/W4249528182","https://openalex.org/W4255272544","https://openalex.org/W4283751919","https://openalex.org/W4293339422","https://openalex.org/W4293649366","https://openalex.org/W4297841912","https://openalex.org/W4308840623","https://openalex.org/W4367859498","https://openalex.org/W4375868903","https://openalex.org/W6679338098"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W2088854863","https://openalex.org/W4402568167","https://openalex.org/W3179495260","https://openalex.org/W1976719989","https://openalex.org/W2897924318","https://openalex.org/W2138997758","https://openalex.org/W2077021924"],"abstract_inverted_index":{"Abstract":[0],"Text-to-speech":[1,16],"synthesis":[2,89],"plays":[3],"an":[4,25,105,165],"essential":[5],"role":[6],"in":[7,15,41,101,118,144,237],"facilitating":[8],"human-computer":[9],"interaction.":[10],"Currently,":[11],"the":[12,21,34,46,50,58,61,65,69,86,93,96,102,137,152,185,191,197,203,207,217,223,227,230,238,244,249,253,257,261],"predominant":[13],"approach":[14],"acoustic":[17,265],"models":[18],"selects":[19],"only":[20],"Mel":[22,35,62,186],"spectrum":[23,187],"as":[24,164,241,243],"intermediate":[26],"feature":[27,174],"for":[28,202],"converting":[29],"text":[30],"to":[31,45,53,91,131,134,216,226,259],"speech.":[32],"However,":[33],"spectrograms":[36],"obtained":[37],"may":[38],"exhibit":[39],"ambiguity":[40],"some":[42],"aspects":[43],"owing":[44],"limited":[47],"capability":[48,133],"of":[49,60,67,71,95,154,229,246,263],"Fourier":[51],"transform":[52,113],"capture":[54],"mutation":[55],"signals":[56],"during":[57],"acquisition":[59],"spectrograms.":[63,109],"With":[64],"aim":[66],"improving":[68],"clarity":[70,153],"synthesized":[72,156,205],"speech,":[73],"this":[74],"study":[75,103,149,250],"proposes":[76],"a":[77,173],"multi-task":[78,210,234,247],"learning":[79,211,235],"optimization":[80],"method":[81,100,236,255],"and":[82,124,140,170,179],"conducts":[83],"experiments":[84],"on":[85],"Tacotron2":[87,155,239],"speech":[88,122,125,157,204],"system":[90],"demonstrate":[92],"effectiveness":[94,245],"proposed":[97,254],"method.":[98],"The":[99,110],"introduces":[104],"additional":[106],"task:":[107],"wavelet":[108,112,232],"continuous":[111,231],"has":[114,256],"gained":[115],"significant":[116],"popularity":[117],"various":[119],"applications,":[120],"including":[121],"enhancement":[123],"recognition,":[126],"which":[127],"is":[128,177,212],"primarily":[129],"attributed":[130],"its":[132,141],"adaptively":[135],"vary":[136],"time-frequency":[138],"resolution":[139],"excellent":[142],"performance":[143,262],"capturing":[145],"non-stationary":[146],"signals.":[147],"This":[148],"highlights":[150],"that":[151,196,252],"can":[158],"be":[159],"improved":[160],"by":[161,190,206,221],"introducing":[162],"Wavelet-spectrogram":[163,180],"auxiliary":[166],"task":[167],"through":[168],"theoretical":[169],"experimental":[171],"analysis:":[172],"extraction":[175],"network":[176],"added,":[178],"features":[181],"are":[182],"extracted":[183],"from":[184],"output":[188],"generated":[189],"decoder.":[192],"Experimental":[193],"findings":[194],"indicate":[195],"Mean":[198],"Opinion":[199],"Score":[200],"achieved":[201],"model":[208],"using":[209],"0.17":[213],"higher":[214],"compared":[215],"baseline":[218],"model.":[219],"Furthermore,":[220],"analyzing":[222],"factors":[224],"contributing":[225],"success":[228],"transform-based":[233],"model,":[240],"well":[242],"learning,":[248],"conjectures":[251],"potential":[258],"enhance":[260],"other":[264],"models.":[266]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
