{"id":"https://openalex.org/W2965648961","doi":"https://doi.org/10.1587/transinf.2018edp7344","title":"Pre-Training of DNN-Based Speech Synthesis Based on Bidirectional Conversion between Text and Speech","display_name":"Pre-Training of DNN-Based Speech Synthesis Based on Bidirectional Conversion between Text and Speech","publication_year":2019,"publication_date":"2019-07-31","ids":{"openalex":"https://openalex.org/W2965648961","doi":"https://doi.org/10.1587/transinf.2018edp7344","mag":"2965648961"},"language":"en","primary_location":{"id":"doi:10.1587/transinf.2018edp7344","is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2018edp7344","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E102.D/8/E102.D_2018EDP7344/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Information and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://www.jstage.jst.go.jp/article/transinf/E102.D/8/E102.D_2018EDP7344/_pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056820648","display_name":"Kentaro Sone","orcid":null},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Kentaro SONE","raw_affiliation_strings":["Graduate School of Informatics and Engineering, The University of Electro-Communications"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics and Engineering, The University of Electro-Communications","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041887072","display_name":"Toru Nakashika","orcid":"https://orcid.org/0000-0003-1863-6771"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Toru NAKASHIKA","raw_affiliation_strings":["Graduate School of Informatics and Engineering, The University of Electro-Communications"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics and Engineering, The University of Electro-Communications","institution_ids":["https://openalex.org/I20529979"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5056820648"],"corresponding_institution_ids":["https://openalex.org/I20529979"],"apc_list":null,"apc_paid":null,"fwci":0.1445,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.57636079,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"E102.D","issue":"8","first_page":"1546","last_page":"1553"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8262496590614319},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.7117775082588196},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6686899065971375},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.6540534496307373},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6527234315872192},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.6025369167327881},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5713709592819214},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.51127028465271},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46987301111221313},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.42595940828323364},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.30946433544158936},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07726573944091797}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8262496590614319},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.7117775082588196},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6686899065971375},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.6540534496307373},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6527234315872192},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.6025369167327881},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5713709592819214},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.51127028465271},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46987301111221313},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.42595940828323364},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.30946433544158936},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07726573944091797},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1587/transinf.2018edp7344","is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2018edp7344","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E102.D/8/E102.D_2018EDP7344/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Information and Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1587/transinf.2018edp7344","is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2018edp7344","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E102.D/8/E102.D_2018EDP7344/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Information and Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G2340638847","display_name":null,"funder_award_id":"ACT-I","funder_id":"https://openalex.org/F4320334789","funder_display_name":"Japan Science and Technology Agency"}],"funders":[{"id":"https://openalex.org/F4320325763","display_name":"Telecommunications Advancement Foundation","ror":"https://ror.org/05y77zf79"},{"id":"https://openalex.org/F4320334789","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2965648961.pdf","grobid_xml":"https://content.openalex.org/works/W2965648961.grobid-xml"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W177847060","https://openalex.org/W2039800941","https://openalex.org/W2045158511","https://openalex.org/W2049036695","https://openalex.org/W2057609679","https://openalex.org/W2093450784","https://openalex.org/W2095425517","https://openalex.org/W2100495367","https://openalex.org/W2102003408","https://openalex.org/W2111284386","https://openalex.org/W2126143605","https://openalex.org/W2129142580","https://openalex.org/W2136922672","https://openalex.org/W2136936677","https://openalex.org/W2150658333","https://openalex.org/W2154920538","https://openalex.org/W2160815625","https://openalex.org/W2584032004","https://openalex.org/W2618530766","https://openalex.org/W2790969636"],"related_works":["https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W2108985546","https://openalex.org/W2038801705","https://openalex.org/W2433276473","https://openalex.org/W1537411440","https://openalex.org/W2024201202","https://openalex.org/W2535215250","https://openalex.org/W2049083033","https://openalex.org/W290673751"],"abstract_inverted_index":{"Conventional":[0],"approaches":[1],"to":[2,17,31,58,66,74,128,161,171,225],"statistical":[3,104],"parametric":[4,105],"speech":[5,19,78,106,241,247,274],"synthesis":[6,79,107,248,301],"use":[7],"context-dependent":[8],"hidden":[9,145],"Markov":[10],"models":[11],"(HMMs)":[12],"clustered":[13],"using":[14,255],"decision":[15,25,45],"trees":[16,26,46],"generate":[18],"parameters":[20,190,239,290],"from":[21,71,156,222],"linguistic":[22,37,72,136,212,216],"features.":[23,137],"However,":[24],"are":[27,232,294],"not":[28],"always":[29],"appropriate":[30],"model":[32,115,187],"complex":[33],"context":[34],"dependencies":[35,70,155,175],"of":[36,91,122,133,199,267,291],"features":[38,73,210,217,220,231],"efficiently.":[39],"An":[40],"alternative":[41],"scheme":[42],"that":[43,251,280,298],"replaces":[44],"with":[47,139],"deep":[48,113,201],"neural":[49],"networks":[50],"(DNNs)":[51],"was":[52],"presented":[53],"as":[54,297],"a":[55,82,85,98,112,119,141,166,200,246],"possible":[56],"way":[57],"overcome":[59],"the":[60,64,89,92,130,173,181,186,204,228,265,288,295,300],"difficulty.":[61],"By":[62],"training":[63,268],"network":[65],"represent":[67,153,172],"high-dimensional":[68],"feedforward":[69,154],"acoustic":[75,134,209,219,230],"features,":[76,213],"DNN-based":[77,103,253,261,284],"systems":[80,254],"convert":[81],"text":[83],"into":[84],"speech.":[86],"To":[87],"improved":[88],"naturalness":[90],"synthesized":[93],"speech,":[94],"this":[95],"paper":[96],"presents":[97],"novel":[99],"pre-training":[100],"method":[101,235,258,282,293],"for":[102,240],"systems.":[108],"In":[109],"our":[110,234,256,281,292],"method,":[111],"relational":[114],"(DRM),":[116],"which":[117],"represents":[118],"joint":[120,131],"probability":[121],"two":[123,148,177,194],"visible":[124,149,158,163,178],"variables,":[125],"is":[126,270],"applied":[127],"describe":[129],"distribution":[132],"and":[135,147,197,214],"As":[138],"DNNs,":[140],"DRM":[142,167],"consists":[143],"several":[144],"layers":[146],"layers.":[150],"Although":[151],"DNNs":[152],"one":[157],"variables":[159,164],"(inputs)":[160],"other":[162],"(outputs),":[165],"has":[168],"an":[169],"ability":[170],"bidirectional":[174,205],"between":[176,193,207],"variables.":[179],"During":[180],"maximum-likelihood":[182],"(ML)":[183],"-based":[184],"training,":[185],"optimizes":[188],"its":[189],"(connection":[191],"weights":[192],"adjacent":[195],"layers,":[196],"biases)":[198],"architecture":[202],"considering":[203,226],"conversion":[206],"1)":[208],"given":[211,218],"2)":[215],"generated":[221,229],"itself.":[223],"Owing":[224],"whether":[227],"recognizable,":[233],"can":[236],"obtain":[237],"reasonable":[238],"synthesis.":[242],"Experimental":[243],"results":[244,277],"in":[245,299],"task":[249],"show":[250,279],"pre-trained":[252],"proposed":[257],"outperformed":[259,283],"randomly-initialized":[260],"systems,":[262,285],"especially":[263],"when":[264],"amount":[266],"data":[269],"limited.":[271],"Additionally,":[272],"speaker-dependent":[273],"recognition":[275],"experimental":[276],"also":[278],"by":[286],"setting":[287],"initial":[289],"same":[296],"experiments.":[302]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
