{"id":"https://openalex.org/W2972939746","doi":"https://doi.org/10.21437/ssw.2019-12","title":"Statistical Voice Conversion with Quasi-periodic WaveNet Vocoder","display_name":"Statistical Voice Conversion with Quasi-periodic WaveNet Vocoder","publication_year":2019,"publication_date":"2019-09-14","ids":{"openalex":"https://openalex.org/W2972939746","doi":"https://doi.org/10.21437/ssw.2019-12","mag":"2972939746"},"language":"en","primary_location":{"id":"doi:10.21437/ssw.2019-12","is_oa":false,"landing_page_url":"https://doi.org/10.21437/ssw.2019-12","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"10th ISCA Workshop on Speech Synthesis (SSW 10)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037001032","display_name":"Yi-Chiao Wu","orcid":"https://orcid.org/0000-0003-4390-1354"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yi-Chiao Wu","raw_affiliation_strings":["Graduate School of Informatics, Nagoya University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Nagoya University, Japan","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050897938","display_name":"Patrick Lumban Tobing","orcid":"https://orcid.org/0000-0003-2792-8418"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Patrick Lumban Tobing","raw_affiliation_strings":["Graduate School of Informatics, Nagoya University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Nagoya University, Japan","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078778981","display_name":"Tomoki Hayashi","orcid":"https://orcid.org/0000-0001-8782-4093"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomoki Hayashi","raw_affiliation_strings":["Graduate School of Information Science, Nagoya University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nagoya University, Japan","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037417355","display_name":"Kazuhiro Kobayashi","orcid":"https://orcid.org/0000-0001-7801-147X"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazuhiro Kobayashi","raw_affiliation_strings":["Information Technology Center, Nagoya University, Japan"],"affiliations":[{"raw_affiliation_string":"Information Technology Center, Nagoya University, Japan","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078330211","display_name":"Tomoki Toda","orcid":"https://orcid.org/0000-0001-8146-1279"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomoki Toda","raw_affiliation_strings":["Information Technology Center, Nagoya University, Japan"],"affiliations":[{"raw_affiliation_string":"Information Technology Center, Nagoya University, Japan","institution_ids":["https://openalex.org/I60134161"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5037001032"],"corresponding_institution_ids":["https://openalex.org/I60134161"],"apc_list":null,"apc_paid":null,"fwci":0.4335,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.7225236,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"63","last_page":"68"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7885288000106812},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7078119516372681},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5673048496246338},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5302422046661377},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4730626344680786},{"id":"https://openalex.org/keywords/controllability","display_name":"Controllability","score":0.44193899631500244},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4119938313961029},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3562513589859009},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2836250066757202},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12102389335632324}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7885288000106812},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7078119516372681},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5673048496246338},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5302422046661377},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4730626344680786},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.44193899631500244},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4119938313961029},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3562513589859009},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2836250066757202},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12102389335632324},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/ssw.2019-12","is_oa":false,"landing_page_url":"https://doi.org/10.21437/ssw.2019-12","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"10th ISCA Workshop on Speech Synthesis (SSW 10)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1628655565","display_name":null,"funder_award_id":"JPMJPR1657","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G3291250701","display_name":null,"funder_award_id":"KAKENHI","funder_id":"https://openalex.org/F4320338111","funder_display_name":"Precursory Research for Embryonic Science and Technology"},{"id":"https://openalex.org/G3497787681","display_name":null,"funder_award_id":"JPMJPR1657","funder_id":"https://openalex.org/F4320338111","funder_display_name":"Precursory Research for Embryonic Science and Technology"},{"id":"https://openalex.org/G6466568207","display_name":"Development of fundamental technology for speech and sound event processing based on complementary use of air- and body-conducted sound signals","funder_award_id":"17H01763","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320338111","display_name":"Precursory Research for Embryonic Science and Technology","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1509691205","https://openalex.org/W1935012542","https://openalex.org/W1977362459","https://openalex.org/W2017425464","https://openalex.org/W2049686551","https://openalex.org/W2086796102","https://openalex.org/W2120605154","https://openalex.org/W2121387787","https://openalex.org/W2151626637","https://openalex.org/W2156142001","https://openalex.org/W2156477760","https://openalex.org/W2157412983","https://openalex.org/W2290463584","https://openalex.org/W2294351487","https://openalex.org/W2471520273","https://openalex.org/W2512087624","https://openalex.org/W2518172956","https://openalex.org/W2518312472","https://openalex.org/W2519091744","https://openalex.org/W2532494225","https://openalex.org/W2746474733","https://openalex.org/W2749651610","https://openalex.org/W2786868129","https://openalex.org/W2797310469","https://openalex.org/W2802455234","https://openalex.org/W2802935216","https://openalex.org/W2803595463","https://openalex.org/W2888932932","https://openalex.org/W2889064624","https://openalex.org/W2889329491","https://openalex.org/W2891813127","https://openalex.org/W2903365642","https://openalex.org/W2919752630","https://openalex.org/W2962850167","https://openalex.org/W2962896155","https://openalex.org/W2963035245","https://openalex.org/W2963411216","https://openalex.org/W2963840672","https://openalex.org/W2964243274","https://openalex.org/W2973115941"],"related_works":["https://openalex.org/W2164147372","https://openalex.org/W2550171623","https://openalex.org/W4253660971","https://openalex.org/W1909292483","https://openalex.org/W1428730622","https://openalex.org/W1658560081","https://openalex.org/W2146616055","https://openalex.org/W2899676847","https://openalex.org/W596245619","https://openalex.org/W2254895521"],"abstract_inverted_index":{"In":[0],"this":[1,92],"paper,":[2],"we":[3],"investigate":[4],"the":[5,32,51,59,87,94,108,118,121,138,146,158,166,176],"effectiveness":[6],"of":[7,50],"a":[8,15,21,71,101,113,131],"quasi-periodic":[9],"WaveNet":[10,25],"(QPNet)":[11],"vocoder":[12,27,61,96,140,160,169],"combined":[13],"with":[14],"statistical":[16],"spectral":[17,125,152],"conversion":[18,23,40,89],"technique":[19],"for":[20],"voice":[22,39,88],"task.The":[24],"(WN)":[26],"has":[28],"been":[29],"applied":[30],"as":[31],"waveform":[33],"generation":[34],"module":[35],"in":[36,86],"many":[37],"different":[38],"frameworks":[41],"and":[42,55,68,111,136,150],"achieves":[43,161],"significant":[44],"improvement":[45],"over":[46],"conventional":[47],"vocoders.However,":[48],"because":[49],"fixed":[52],"dilated":[53,103],"convolution":[54,104],"generic":[56],"network":[57,73,116],"architecture,":[58],"WN":[60,119,168,178],"lacks":[62],"robustness":[63],"against":[64],"unseen":[65],"input":[66,124],"features":[67,126],"often":[69],"requires":[70],"huge":[72],"size":[74],"to":[75,83,106,175],"achieve":[76],"acceptable":[77],"speech":[78,143,173],"quality.Such":[79],"limitations":[80],"usually":[81],"lead":[82],"performance":[84,164],"degradation":[85],"task.To":[90],"overcome":[91],"problem,":[93],"QPNet":[95,139,159],"is":[97],"applied,":[98],"which":[99],"includes":[100],"pitch-dependent":[102],"component":[105],"enhance":[107],"pitch":[109],"controllability":[110],"attain":[112],"more":[114],"compact":[115],"than":[117,165],"vocoder.In":[120],"proposed":[122],"method,":[123],"are":[127],"first":[128],"converted":[129,142,148],"using":[130],"framewise":[132],"deep":[133],"neural":[134],"network,":[135],"then":[137],"generates":[141],"conditioned":[144],"on":[145],"linearly":[147],"prosodic":[149],"transformed":[151],"features.The":[153],"experimental":[154],"results":[155],"confirm":[156],"that":[157],"significantly":[162],"better":[163],"same-size":[167],"while":[170],"maintaining":[171],"comparable":[172],"quality":[174],"double-size":[177],"vocoder.":[179]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
