{"id":"https://openalex.org/W2747914378","doi":"https://doi.org/10.21437/interspeech.2017-246","title":"An RNN-Based Quantized F0 Model with Multi-Tier Feedback Links for Text-to-Speech Synthesis","display_name":"An RNN-Based Quantized F0 Model with Multi-Tier Feedback Links for Text-to-Speech Synthesis","publication_year":2017,"publication_date":"2017-08-16","ids":{"openalex":"https://openalex.org/W2747914378","doi":"https://doi.org/10.21437/interspeech.2017-246","mag":"2747914378"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2017-246","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-246","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/b828e030-f2a4-462e-8cb1-bb20b737fcf7","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100327839","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0001-8246-0606"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]},{"id":"https://openalex.org/I200475212","display_name":"The Graduate University for Advanced Studies, SOKENDAI","ror":"https://ror.org/0516ah480","country_code":"JP","type":"education","lineage":["https://openalex.org/I200475212"]},{"id":"https://openalex.org/I113766436","display_name":"Surugadai University","ror":"https://ror.org/00c4wmy51","country_code":"JP","type":"education","lineage":["https://openalex.org/I113766436"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Xin Wang","raw_affiliation_strings":["National Institute of Informatics, Japan","SOKENDAI University, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Japan","institution_ids":["https://openalex.org/I184597095"]},{"raw_affiliation_string":"SOKENDAI University, Japan","institution_ids":["https://openalex.org/I113766436","https://openalex.org/I200475212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062895056","display_name":"Shinji Takaki","orcid":"https://orcid.org/0000-0001-7294-7699"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinji Takaki","raw_affiliation_strings":["National Institute of Informatics, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[{"id":"https://openalex.org/I200475212","display_name":"The Graduate University for Advanced Studies, SOKENDAI","ror":"https://ror.org/0516ah480","country_code":"JP","type":"education","lineage":["https://openalex.org/I200475212"]},{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]},{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]},{"id":"https://openalex.org/I113766436","display_name":"Surugadai University","ror":"https://ror.org/00c4wmy51","country_code":"JP","type":"education","lineage":["https://openalex.org/I113766436"]}],"countries":["GB","JP"],"is_corresponding":false,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":["SOKENDAI University, Japan","National Institute of Informatics, Japan","University of Edinburgh, UK"],"affiliations":[{"raw_affiliation_string":"SOKENDAI University, Japan","institution_ids":["https://openalex.org/I113766436","https://openalex.org/I200475212"]},{"raw_affiliation_string":"National Institute of Informatics, Japan","institution_ids":["https://openalex.org/I184597095"]},{"raw_affiliation_string":"University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100327839"],"corresponding_institution_ids":["https://openalex.org/I113766436","https://openalex.org/I184597095","https://openalex.org/I200475212"],"apc_list":null,"apc_paid":null,"fwci":4.8756,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.96095111,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1059","last_page":"1063"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6652432084083557},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6403791308403015},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6347215175628662},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6242720484733582},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5697266459465027},{"id":"https://openalex.org/keywords/correlation","display_name":"Correlation","score":0.5003175735473633},{"id":"https://openalex.org/keywords/syllable","display_name":"Syllable","score":0.4995241165161133},{"id":"https://openalex.org/keywords/pitch-contour","display_name":"Pitch contour","score":0.4772208333015442},{"id":"https://openalex.org/keywords/mean-opinion-score","display_name":"Mean opinion score","score":0.458680659532547},{"id":"https://openalex.org/keywords/symbol","display_name":"Symbol (formal)","score":0.45233747363090515},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.43337082862854004},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.42616069316864014},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4215218424797058},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3701842725276947},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35802310705184937},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2263159155845642},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07778078317642212}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6652432084083557},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6403791308403015},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6347215175628662},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6242720484733582},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5697266459465027},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.5003175735473633},{"id":"https://openalex.org/C109089402","wikidata":"https://www.wikidata.org/wiki/Q8188","display_name":"Syllable","level":2,"score":0.4995241165161133},{"id":"https://openalex.org/C2777895490","wikidata":"https://www.wikidata.org/wiki/Q7198848","display_name":"Pitch contour","level":2,"score":0.4772208333015442},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.458680659532547},{"id":"https://openalex.org/C134400042","wikidata":"https://www.wikidata.org/wiki/Q2372244","display_name":"Symbol (formal)","level":2,"score":0.45233747363090515},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.43337082862854004},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42616069316864014},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4215218424797058},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3701842725276947},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35802310705184937},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2263159155845642},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07778078317642212},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2017-246","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-246","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/b828e030-f2a4-462e-8cb1-bb20b737fcf7","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/b828e030-f2a4-462e-8cb1-bb20b737fcf7","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Wang, X, Takaki, S & Yamagishi, J 2017, An RNN-based Quantized F0 Model with Multi-tier Feedback Links for Text-to-Speech Synthesis. in Proceedings Interspeech 2017. Interspeech, International Speech Communication Association, pp. 1059-1063, Interspeech 2017, Stockholm, Sweden, 20/08/17. https://doi.org/10.21437/Interspeech.2017-246","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/b828e030-f2a4-462e-8cb1-bb20b737fcf7","is_oa":false,"landing_page_url":"http://hdl.handle.net/20.500.11820/b828e030-f2a4-462e-8cb1-bb20b737fcf7","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/b828e030-f2a4-462e-8cb1-bb20b737fcf7","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/b828e030-f2a4-462e-8cb1-bb20b737fcf7","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Wang, X, Takaki, S & Yamagishi, J 2017, An RNN-based Quantized F0 Model with Multi-tier Feedback Links for Text-to-Speech Synthesis. in Proceedings Interspeech 2017. Interspeech, International Speech Communication Association, pp. 1059-1063, Interspeech 2017, Stockholm, Sweden, 20/08/17. https://doi.org/10.21437/Interspeech.2017-246","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W8081149","https://openalex.org/W36903255","https://openalex.org/W113106864","https://openalex.org/W159170959","https://openalex.org/W167581994","https://openalex.org/W304834817","https://openalex.org/W648786980","https://openalex.org/W1500192039","https://openalex.org/W1512429158","https://openalex.org/W1525554764","https://openalex.org/W1540083112","https://openalex.org/W1810943226","https://openalex.org/W1931877416","https://openalex.org/W1973746598","https://openalex.org/W1990505856","https://openalex.org/W1995118599","https://openalex.org/W1996550570","https://openalex.org/W2013020033","https://openalex.org/W2029434926","https://openalex.org/W2030501716","https://openalex.org/W2034277951","https://openalex.org/W2049686551","https://openalex.org/W2106421426","https://openalex.org/W2106564373","https://openalex.org/W2111284386","https://openalex.org/W2123449470","https://openalex.org/W2154280657","https://openalex.org/W2154920538","https://openalex.org/W2168531172","https://openalex.org/W2174424190","https://openalex.org/W2176263492","https://openalex.org/W2394662942","https://openalex.org/W2485688913","https://openalex.org/W2519091744","https://openalex.org/W2767767449","https://openalex.org/W2963248296","https://openalex.org/W2963932686","https://openalex.org/W3150996319"],"related_works":["https://openalex.org/W3109498233","https://openalex.org/W3164858600","https://openalex.org/W2031768607","https://openalex.org/W2902064555","https://openalex.org/W4200068392","https://openalex.org/W1522063982","https://openalex.org/W196866866","https://openalex.org/W4205278983","https://openalex.org/W4298324454","https://openalex.org/W3161890269"],"abstract_inverted_index":{"A":[0],"recurrent-neural-network-based":[1],"F0":[2,10,21,34,48,74,97,108,121,132,139,150],"model":[3,91,125],"for":[4,113],"text-to-speech":[5],"(TTS)":[6],"synthesis":[7],"that":[8,85,136],"generates":[9],"contours":[11,35,151],"given":[12],"textual":[13],"features":[14,75],"is":[15,26,42,68,84,92],"proposed.":[16],"In":[17],"contrast":[18],"to":[19,28],"related":[20],"models,":[22],"the":[23,30,47,51,55,59,63,78,86,89,94,114,119,123,127,137],"proposed":[24,90,124,138],"one":[25],"designed":[27],"learn":[29],"temporal":[31],"correlation":[32,41,64],"of":[33,50,58,88,103,129],"at":[36],"multiple":[37],"levels.":[38],"The":[39],"frame-level":[40],"covered":[43],"by":[44,72],"feeding":[45],"back":[46],"output":[49,87],"previous":[52],"frame":[53],"as":[54],"additional":[56],"input":[57],"current":[60],"frame;":[61],"meanwhile,":[62],"over":[65,77],"long-time":[66],"spans":[67],"similarly":[69],"modeled":[70],"but":[71,99],"using":[73,118,144],"aggregated":[76],"phoneme":[79],"and":[80,110],"syllable.":[81],"Another":[82],"difference":[83],"not":[93],"interpolated":[95,131],"continuous-valued":[96],"contour":[98],"rather":[100],"a":[101,111,145],"sequence":[102],"discrete":[104,120],"symbols,":[105,122],"including":[106],"quantized":[107],"levels":[109],"symbol":[112],"unvoiced":[115],"condition.":[116],"By":[117],"avoids":[126],"influence":[128],"artificially":[130],"curves.":[133],"Experiments":[134],"demonstrated":[135],"model,":[140],"which":[141],"was":[142],"trained":[143],"dropout":[146],"strategy,":[147],"generated":[148],"smooth":[149],"with":[152],"relatively":[153],"better":[154],"perceived":[155],"quality":[156],"than":[157],"those":[158],"from":[159],"baseline":[160],"RNN":[161],"models.":[162]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":11},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
