{"id":"https://openalex.org/W2921047845","doi":"https://doi.org/10.23919/apsipa.2018.8659465","title":"Prosody-aware subword embedding considering Japanese intonation systems and its application to DNN-based multi-dialect speech synthesis","display_name":"Prosody-aware subword embedding considering Japanese intonation systems and its application to DNN-based multi-dialect speech synthesis","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2921047845","doi":"https://doi.org/10.23919/apsipa.2018.8659465","mag":"2921047845"},"language":"en","primary_location":{"id":"doi:10.23919/apsipa.2018.8659465","is_oa":false,"landing_page_url":"https://doi.org/10.23919/apsipa.2018.8659465","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049408893","display_name":"Takanori Akiyama","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takanori Akiyama","raw_affiliation_strings":["Graduate School of Information Science and Technology, The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science and Technology, The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013050263","display_name":"Shinnosuke Takamichi","orcid":"https://orcid.org/0000-0003-0520-7847"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinnosuke Takamichi","raw_affiliation_strings":["Graduate School of Information Science and Technology, The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science and Technology, The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003814223","display_name":"Hiroshi Saruwatari","orcid":"https://orcid.org/0000-0003-0876-5617"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroshi Saruwatari","raw_affiliation_strings":["Graduate School of Information Science and Technology, The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science and Technology, The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5049408893"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.6515,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.78037493,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"659","last_page":"664"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.8839174509048462},{"id":"https://openalex.org/keywords/intonation","display_name":"Intonation (linguistics)","score":0.8001747131347656},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7873744964599609},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6668180227279663},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5748593211174011},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5560281276702881},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.548302948474884},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5379979610443115},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5114342570304871},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.49769237637519836},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4519587755203247},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.20534569025039673}],"concepts":[{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.8839174509048462},{"id":"https://openalex.org/C2781045179","wikidata":"https://www.wikidata.org/wiki/Q5576720","display_name":"Intonation (linguistics)","level":2,"score":0.8001747131347656},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7873744964599609},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6668180227279663},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5748593211174011},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5560281276702881},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.548302948474884},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5379979610443115},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5114342570304871},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.49769237637519836},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4519587755203247},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.20534569025039673},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/apsipa.2018.8659465","is_oa":false,"landing_page_url":"https://doi.org/10.23919/apsipa.2018.8659465","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.75,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W72347498","https://openalex.org/W179875071","https://openalex.org/W1523372075","https://openalex.org/W2005522781","https://openalex.org/W2046056978","https://openalex.org/W2095723991","https://openalex.org/W2102003408","https://openalex.org/W2109444541","https://openalex.org/W2129142580","https://openalex.org/W2148708890","https://openalex.org/W2150769028","https://openalex.org/W2156387975","https://openalex.org/W2293049663","https://openalex.org/W2395578248","https://openalex.org/W2401698713","https://openalex.org/W2471520273","https://openalex.org/W2516321201","https://openalex.org/W2519091744","https://openalex.org/W2519648275","https://openalex.org/W2605320104","https://openalex.org/W2607404225","https://openalex.org/W2658996865","https://openalex.org/W2745526875","https://openalex.org/W2746498480","https://openalex.org/W2765486990","https://openalex.org/W2806666542","https://openalex.org/W2962784628","https://openalex.org/W2962916039","https://openalex.org/W6602935006","https://openalex.org/W6631309588","https://openalex.org/W6674546848","https://openalex.org/W6675380101","https://openalex.org/W6682026795","https://openalex.org/W6682889407","https://openalex.org/W6711777497","https://openalex.org/W6736204136"],"related_works":["https://openalex.org/W1965454423","https://openalex.org/W2028501571","https://openalex.org/W2052542215","https://openalex.org/W2036564641","https://openalex.org/W10581632","https://openalex.org/W1927421023","https://openalex.org/W3149582125","https://openalex.org/W157238252","https://openalex.org/W2169632867","https://openalex.org/W2465421051"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"prosody-aware":[3,52,85,114],"subword":[4,86,94,151],"embedding":[5,54,87,129,157],"considering":[6,88,99,123],"Japanese":[7,43,89,138,141,178],"intonation":[8,90],"systems":[9],"and":[10,66,101,156],"its":[11],"application":[12],"to":[13,37,77,126,135],"DNN":[14],"(deep":[15],"neural":[16],"network)-based":[17],"multi-dialect":[18,142,147,170],"speech":[19,27,143,174],"synthesis.":[20,144],"In":[21,145],"accordance":[22],"with":[23],"recent":[24],"improvements":[25],"of":[26],"synthesis":[28],"in":[29,60,176],"rich-resourced":[30],"languages,":[31],"the":[32,58,128,133,146,168],"research":[33],"trend":[34],"is":[35,97],"shifting":[36],"more":[38],"challenging":[39],"languages":[40],"such":[41],"as":[42],"dialects":[44,155],"that":[45,167],"still":[46],"have":[47],"undefined":[48],"prosodic":[49],"contexts.":[50],"Conventional":[51],"word":[53,108],"can":[55,104,172],"unsupervisedly":[56],"extract":[57],"contexts":[59,71],"a":[61,119],"data-driven":[62],"manner":[63],"using":[64],"words":[65,74],"F0":[67],"sequences.":[68],"However,":[69],"accurate":[70],"for":[72,113],"unknown":[73,107],"are":[75],"difficult":[76],"generate.":[78],"To":[79],"solve":[80],"this":[81],"problem,":[82],"we":[83,149],"propose":[84,118,150],"systems.":[91],"The":[92,163],"unsupervised":[93],"model,":[95],"which":[96],"trained":[98],"language":[100],"acoustic":[102],"characteristics,":[103],"tokenize":[105],"an":[106],"into":[109],"known":[110],"subwords":[111],"suitable":[112],"embedding.":[115],"We":[116,131],"also":[117,140],"modulation":[120],"filtering":[121],"method":[122],"intra-subword":[124],"moras":[125],"improve":[127,173],"accuracies.":[130],"apply":[132],"methods":[134,171],"not":[136],"only":[137],"but":[139],"case,":[148],"models":[152,158],"shared":[153],"among":[154],"conditioned":[159],"by":[160],"dialect":[161],"information.":[162],"experimental":[164],"evaluation":[165],"demonstrates":[166],"proposed":[169],"quality":[175],"some":[177],"dialects.":[179]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
