{"id":"https://openalex.org/W3016223359","doi":"https://doi.org/10.1109/icassp40776.2020.9054466","title":"Semi-Supervised Learning Based on Hierarchical Generative Models for End-to-End Speech Synthesis","display_name":"Semi-Supervised Learning Based on Hierarchical Generative Models for End-to-End Speech Synthesis","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3016223359","doi":"https://doi.org/10.1109/icassp40776.2020.9054466","mag":"3016223359"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054466","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054466","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039729197","display_name":"Takato Fujimoto","orcid":null},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takato Fujimoto","raw_affiliation_strings":["Nagoya Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Nagoya Institute of Technology, Japan","institution_ids":["https://openalex.org/I197274945"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062895056","display_name":"Shinji Takaki","orcid":"https://orcid.org/0000-0001-7294-7699"},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinji Takaki","raw_affiliation_strings":["Nagoya Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Nagoya Institute of Technology, Japan","institution_ids":["https://openalex.org/I197274945"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067594810","display_name":"Kei Hashimoto","orcid":"https://orcid.org/0000-0003-2081-0396"},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kei Hashimoto","raw_affiliation_strings":["Nagoya Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Nagoya Institute of Technology, Japan","institution_ids":["https://openalex.org/I197274945"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049909008","display_name":"Keiichiro Oura","orcid":null},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keiichiro Oura","raw_affiliation_strings":["Nagoya Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Nagoya Institute of Technology, Japan","institution_ids":["https://openalex.org/I197274945"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023240652","display_name":"Yoshihiko Nankaku","orcid":null},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshihiko Nankaku","raw_affiliation_strings":["Nagoya Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Nagoya Institute of Technology, Japan","institution_ids":["https://openalex.org/I197274945"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103023678","display_name":"Keiichi Tokuda","orcid":"https://orcid.org/0000-0001-6143-0133"},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keiichi Tokuda","raw_affiliation_strings":["Nagoya Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Nagoya Institute of Technology, Japan","institution_ids":["https://openalex.org/I197274945"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5039729197"],"corresponding_institution_ids":["https://openalex.org/I197274945"],"apc_list":null,"apc_paid":null,"fwci":0.2651,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.61434876,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"7644","last_page":"7648"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.9194365739822388},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7913022637367249},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.7339974045753479},{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.7109335660934448},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6723304390907288},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5888574719429016},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.548663318157196},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4916256070137024},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.4730719029903412},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4411681592464447},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33687666058540344},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09277850389480591}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.9194365739822388},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7913022637367249},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.7339974045753479},{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.7109335660934448},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6723304390907288},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5888574719429016},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.548663318157196},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4916256070137024},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.4730719029903412},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4411681592464447},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33687666058540344},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09277850389480591},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054466","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054466","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8100000023841858}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W179875071","https://openalex.org/W854541894","https://openalex.org/W1522301498","https://openalex.org/W1866230956","https://openalex.org/W1959608418","https://openalex.org/W2064675550","https://openalex.org/W2129142580","https://openalex.org/W2133564696","https://openalex.org/W2327501763","https://openalex.org/W2394921947","https://openalex.org/W2423557781","https://openalex.org/W2519091744","https://openalex.org/W2749651610","https://openalex.org/W2892140764","https://openalex.org/W2945078028","https://openalex.org/W2948238043","https://openalex.org/W2949382160","https://openalex.org/W2952711665","https://openalex.org/W2952838738","https://openalex.org/W2963403868","https://openalex.org/W2963568578","https://openalex.org/W2963636093","https://openalex.org/W2963691546","https://openalex.org/W2963827314","https://openalex.org/W2963945466","https://openalex.org/W2964002616","https://openalex.org/W2964121744","https://openalex.org/W2964243274","https://openalex.org/W2964308564","https://openalex.org/W2972694856","https://openalex.org/W2973043900","https://openalex.org/W2996573371","https://openalex.org/W4289383906","https://openalex.org/W4385245566","https://openalex.org/W6623517193","https://openalex.org/W6631190155","https://openalex.org/W6639317949","https://openalex.org/W6640963894","https://openalex.org/W6679434410","https://openalex.org/W6712208827","https://openalex.org/W6739901393","https://openalex.org/W6749489859","https://openalex.org/W6754925833","https://openalex.org/W6755300632","https://openalex.org/W6755879856","https://openalex.org/W6762242920","https://openalex.org/W6763316926","https://openalex.org/W6768443183"],"related_works":["https://openalex.org/W4391272374","https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W40885451","https://openalex.org/W2108985546","https://openalex.org/W2081919107","https://openalex.org/W2433276473","https://openalex.org/W1537411440","https://openalex.org/W1984347656","https://openalex.org/W2535215250"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3,18],"general":[4],"framework":[5],"of":[6,38,81,85,88,100],"semi-supervised":[7,75],"learning":[8,76],"based":[9,73],"on":[10,74],"hierarchical":[11],"generative":[12],"models":[13],"and":[14,62,91,110,122],"adapts":[15],"it":[16,49],"to":[17,36,52,59],"Japanese":[19],"end-to-end":[20,28,56,71],"text-to-speech":[21],"(TTS)":[22],"system.":[23],"In":[24],"English":[25],"TTS,":[26],"several":[27],"systems":[29],"have":[30],"recently":[31],"achieved":[32],"sound":[33],"quality":[34],"close":[35],"that":[37,77,115],"natural":[39],"human":[40],"speech.":[41],"However,":[42],"in":[43],"non-alphabetic":[44],"languages":[45],"such":[46],"as":[47,93],"Japanese,":[48],"is":[50],"difficult":[51],"realize":[53],"true":[54],"text-input":[55],"TTS":[57,72],"due":[58],"character":[60],"diversity":[61],"pitch":[63],"accents.":[64],"To":[65,96],"address":[66],"this":[67],"problem,":[68],"we":[69],"propose":[70],"makes":[78],"the":[79,98,101,116],"most":[80],"existing":[82],"data":[83],"consisting":[84],"any":[86],"combination":[87],"text,":[89],"phoneme,":[90],"waveform":[92],"training":[94],"data.":[95],"demonstrate":[97],"effectiveness":[99],"proposed":[102,117],"system,":[103],"listening":[104],"tests":[105],"were":[106],"conducted":[107],"for":[108],"pronunciation":[109,121],"naturalness.":[111,123],"Our":[112],"results":[113],"show":[114],"system":[118],"improves":[119],"both":[120]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
