{"id":"https://openalex.org/W2395380967","doi":"https://doi.org/10.1109/icassp.2016.7472734","title":"Wavelet-based decomposition of F0 as a secondary task for DNN-based speech synthesis with multi-task learning","display_name":"Wavelet-based decomposition of F0 as a secondary task for DNN-based speech synthesis with multi-task learning","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2395380967","doi":"https://doi.org/10.1109/icassp.2016.7472734","mag":"2395380967"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2016.7472734","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472734","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/db737070-bd03-4b37-adac-b96434a9be3f","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109494794","display_name":"Manuel Sam Ribeiro","orcid":"https://orcid.org/0000-0001-8096-2231"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Manuel Sam Ribeiro","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110238677","display_name":"Oliver Watts","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Oliver Watts","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]},{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB","JP"],"is_corresponding":false,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan","The Centre for Speech Technology Research, University of Edinburgh, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]},{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084488961","display_name":"Robert A. Clark","orcid":"https://orcid.org/0000-0002-4892-3619"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Robert A. J. Clark","raw_affiliation_strings":["The Centre for Speech Technology Research, University of Edinburgh, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Centre for Speech Technology Research, University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.6499,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.91854542,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"5525","last_page":"5529"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7463809847831726},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7008192539215088},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6320129632949829},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.6212711930274963},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5901913642883301},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5284269452095032},{"id":"https://openalex.org/keywords/wavelet","display_name":"Wavelet","score":0.5166974067687988},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.504499077796936},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.45958369970321655},{"id":"https://openalex.org/keywords/syllable","display_name":"Syllable","score":0.45203107595443726},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44072383642196655}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7463809847831726},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7008192539215088},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6320129632949829},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.6212711930274963},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5901913642883301},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5284269452095032},{"id":"https://openalex.org/C47432892","wikidata":"https://www.wikidata.org/wiki/Q831390","display_name":"Wavelet","level":2,"score":0.5166974067687988},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.504499077796936},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.45958369970321655},{"id":"https://openalex.org/C109089402","wikidata":"https://www.wikidata.org/wiki/Q8188","display_name":"Syllable","level":2,"score":0.45203107595443726},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44072383642196655},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icassp.2016.7472734","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472734","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/db737070-bd03-4b37-adac-b96434a9be3f","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/db737070-bd03-4b37-adac-b96434a9be3f","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Ribeiro, M S, Watts, O, Yamagishi, J & Clark, R 2016, Wavelet-based decomposition of F0 as a secondary task for DNN-based speech synthesis with multi-task learning. in 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Institute of Electrical and Electronics Engineers, pp. 5525-5529, 41st IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2016, Shanghai, China, 20/03/16. https://doi.org/10.1109/ICASSP.2016.7472734","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/db737070-bd03-4b37-adac-b96434a9be3f","is_oa":false,"landing_page_url":"https://www.research.ed.ac.uk/portal/en/publications/waveletbased-decomposition-of-f0-as-a-secondary-task-for-dnnbased-speech-synthesis-with-multitask-learning(db737070-bd03-4b37-adac-b96434a9be3f).html","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/db737070-bd03-4b37-adac-b96434a9be3f","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/db737070-bd03-4b37-adac-b96434a9be3f","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Ribeiro, M S, Watts, O, Yamagishi, J & Clark, R 2016, Wavelet-based decomposition of F0 as a secondary task for DNN-based speech synthesis with multi-task learning. in 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Institute of Electrical and Electronics Engineers, pp. 5525-5529, 41st IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2016, Shanghai, China, 20/03/16. https://doi.org/10.1109/ICASSP.2016.7472734","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6700000166893005}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W397522103","https://openalex.org/W1499332833","https://openalex.org/W1517939602","https://openalex.org/W1540083112","https://openalex.org/W1990505856","https://openalex.org/W2034139177","https://openalex.org/W2078597717","https://openalex.org/W2094035326","https://openalex.org/W2102003408","https://openalex.org/W2111284386","https://openalex.org/W2117130368","https://openalex.org/W2127589467","https://openalex.org/W2134973740","https://openalex.org/W2195693064","https://openalex.org/W2296111744","https://openalex.org/W2330979245","https://openalex.org/W2394662942","https://openalex.org/W2395980997","https://openalex.org/W2402019113","https://openalex.org/W2600829178","https://openalex.org/W2913340405","https://openalex.org/W4300985418","https://openalex.org/W6669986114","https://openalex.org/W6712034360","https://openalex.org/W6712239235","https://openalex.org/W6736010183"],"related_works":["https://openalex.org/W2088933974","https://openalex.org/W2387192134","https://openalex.org/W1600688796","https://openalex.org/W2514064218","https://openalex.org/W2132658536","https://openalex.org/W2094188777","https://openalex.org/W2112609279","https://openalex.org/W1581825163","https://openalex.org/W2399404538","https://openalex.org/W1483316057"],"abstract_inverted_index":{"We":[0,43],"investigate":[1],"two":[2,104],"wavelet-based":[3],"decomposition":[4,28],"strategies":[5,105],"of":[6,34,51,61],"the":[7,40,49,52,59,74,85,91,131,139],"f0":[8,70],"signal":[9],"and":[10,78],"their":[11],"usefulness":[12],"as":[13,108],"a":[14,31,45,109,127],"secondary":[15,110],"task":[16,111],"for":[17,36,130],"speech":[18],"synthesis":[19],"using":[20,133],"multi-task":[21,113,134],"deep":[22,114],"neural":[23,115],"networks":[24,116],"(MTL-DNN).":[25],"The":[26],"first":[27],"strategy":[29],"uses":[30],"static":[32],"set":[33],"scales":[35],"all":[37],"utterances":[38],"in":[39,112],"training":[41],"data.":[42],"propose":[44],"second":[46],"strategy,":[47],"where":[48],"scale":[50],"mother":[53],"wavelet":[54,86],"is":[55,66,126],"dynamically":[56],"adjusted":[57],"to":[58,68,73,88,99,138],"rate":[60],"each":[62],"utterance.":[63],"This":[64,81],"approach":[65],"able":[67],"capture":[69],"variations":[71],"related":[72],"syllable,":[75],"word,":[76],"clitic-group,":[77],"phrase":[79],"units.":[80],"method":[82],"also":[83],"constrains":[84],"components":[87],"be":[89,100],"within":[90],"frequency":[92],"range":[93],"that":[94,120],"previous":[95],"experiments":[96],"have":[97],"shown":[98],"more":[101],"natural.":[102],"These":[103],"are":[106],"evaluated":[107],"(MTL-DNNs).":[117],"Results":[118],"indicate":[119],"on":[121],"an":[122],"expressive":[123],"dataset":[124],"there":[125],"strong":[128],"preference":[129],"systems":[132],"learning":[135],"when":[136],"compared":[137],"baseline":[140],"system.":[141]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
