{"id":"https://openalex.org/W2963971656","doi":"https://doi.org/10.1109/taslp.2017.2761547","title":"Statistical Parametric Speech Synthesis Incorporating Generative Adversarial Networks","display_name":"Statistical Parametric Speech Synthesis Incorporating Generative Adversarial Networks","publication_year":2017,"publication_date":"2017-10-09","ids":{"openalex":"https://openalex.org/W2963971656","doi":"https://doi.org/10.1109/taslp.2017.2761547","mag":"2963971656"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2017.2761547","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2017.2761547","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083394213","display_name":"Yuki Saito","orcid":"https://orcid.org/0000-0002-7967-2613"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yuki Saito","raw_affiliation_strings":["Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013050263","display_name":"Shinnosuke Takamichi","orcid":"https://orcid.org/0000-0003-0520-7847"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinnosuke Takamichi","raw_affiliation_strings":["Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003814223","display_name":"Hiroshi Saruwatari","orcid":"https://orcid.org/0000-0003-0876-5617"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroshi Saruwatari","raw_affiliation_strings":["Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5083394213"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":26.4418,"has_fulltext":false,"cited_by_count":227,"citation_normalized_percentile":{"value":0.99616263,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"26","issue":"1","first_page":"84","last_page":"96"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.9495809078216553},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.733942449092865},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.6021127700805664},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5856337547302246},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5834363102912903},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.5636477470397949},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5306362509727478},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.4679255485534668},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.41918328404426575},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39799806475639343},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33129656314849854},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17876973748207092},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10518983006477356},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08827763795852661}],"concepts":[{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.9495809078216553},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.733942449092865},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.6021127700805664},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5856337547302246},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5834363102912903},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.5636477470397949},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5306362509727478},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.4679255485534668},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.41918328404426575},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39799806475639343},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33129656314849854},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17876973748207092},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10518983006477356},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08827763795852661},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2017.2761547","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2017.2761547","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.699999988079071,"display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322525","display_name":"Secom Science and Technology Foundation","ror":"https://ror.org/05ggzej07"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":78,"referenced_works":["https://openalex.org/W58497106","https://openalex.org/W187033940","https://openalex.org/W385466589","https://openalex.org/W1487641199","https://openalex.org/W1517202054","https://openalex.org/W1523372075","https://openalex.org/W1576227399","https://openalex.org/W2023694213","https://openalex.org/W2029434926","https://openalex.org/W2043003570","https://openalex.org/W2049686551","https://openalex.org/W2055070149","https://openalex.org/W2055309977","https://openalex.org/W2099471712","https://openalex.org/W2100495367","https://openalex.org/W2102003408","https://openalex.org/W2111194146","https://openalex.org/W2111284386","https://openalex.org/W2115040572","https://openalex.org/W2120605154","https://openalex.org/W2129142580","https://openalex.org/W2135029798","https://openalex.org/W2156142001","https://openalex.org/W2156387975","https://openalex.org/W2165700458","https://openalex.org/W2184310502","https://openalex.org/W2274854232","https://openalex.org/W2282821441","https://openalex.org/W2293049663","https://openalex.org/W2294013337","https://openalex.org/W2295634712","https://openalex.org/W2338186431","https://openalex.org/W2395578248","https://openalex.org/W2396990910","https://openalex.org/W2401839215","https://openalex.org/W2402103843","https://openalex.org/W2403471241","https://openalex.org/W2404100688","https://openalex.org/W2405756170","https://openalex.org/W2406654659","https://openalex.org/W2407039802","https://openalex.org/W2419501139","https://openalex.org/W2473388484","https://openalex.org/W2517788403","https://openalex.org/W2519091744","https://openalex.org/W2577946330","https://openalex.org/W2593414223","https://openalex.org/W2666408839","https://openalex.org/W2748379347","https://openalex.org/W2963800509","https://openalex.org/W2964024144","https://openalex.org/W2964301388","https://openalex.org/W3037567775","https://openalex.org/W3123963976","https://openalex.org/W4233762729","https://openalex.org/W4293404332","https://openalex.org/W4320013936","https://openalex.org/W4395958265","https://openalex.org/W6607663849","https://openalex.org/W6629354409","https://openalex.org/W6631309588","https://openalex.org/W6675380101","https://openalex.org/W6676358011","https://openalex.org/W6680012447","https://openalex.org/W6682889407","https://openalex.org/W6697322189","https://openalex.org/W6711777497","https://openalex.org/W6712112783","https://openalex.org/W6712820016","https://openalex.org/W6712941293","https://openalex.org/W6713645886","https://openalex.org/W6714093102","https://openalex.org/W6717434760","https://openalex.org/W6730746255","https://openalex.org/W6732248266","https://openalex.org/W6734564793","https://openalex.org/W6779669310","https://openalex.org/W6864847698"],"related_works":["https://openalex.org/W4293320219","https://openalex.org/W2953246223","https://openalex.org/W4283584549","https://openalex.org/W2554314924","https://openalex.org/W4288256692","https://openalex.org/W2998859928","https://openalex.org/W4381885966","https://openalex.org/W2969399009","https://openalex.org/W4398186750","https://openalex.org/W3151498616"],"abstract_inverted_index":{"A":[0,59],"method":[1,151,176],"for":[2,124,166],"statistical":[3],"parametric":[4],"speech":[5,26,30,57,101,147,160,230],"synthesis":[6],"incorporating":[7,89],"generative":[8],"adversarial":[9,122],"networks":[10,18],"(GANs)":[11],"is":[12,32,48,94,134],"proposed.":[13],"Although":[14],"powerful":[15],"deep":[16],"neural":[17,68],"techniques":[19],"can":[20,177],"be":[21],"applied":[22],"to":[23,72,81,96,109,135],"artificially":[24],"synthesize":[25],"waveform,":[27],"the":[28,42,45,55,83,86,90,92,104,111,115,126,129,132,137,143,149,154,158,164,174,203,206,218,222,228],"synthetic":[29,229],"quality":[31,46],"low":[33],"compared":[34],"with":[35],"that":[36,173,213],"of":[37,41,66,114,131,196,205,208,226],"natural":[38,74,98,144,180],"speech.":[39],"One":[40],"issues":[43],"causing":[44],"degradation":[47],"an":[49,121],"oversmoothing":[50,155],"effect":[51,156,204],"often":[52],"observed":[53],"in":[54,62,224],"generated":[56,76,100,146,159],"parameters.":[58,161],"GAN":[60,216],"introduced":[61],"this":[63],"paper":[64],"consists":[65],"two":[67],"networks:":[69],"a":[70,79,214],"discriminator":[71,93],"distinguish":[73,97],"and":[75,78,99,120,145,168,171,183,211],"samples,":[77],"generator":[80],"deceive":[82],"discriminator.":[84,127],"In":[85],"proposed":[87,150,175],"framework":[88],"GANs,":[91,210],"trained":[95,108],"parameters,":[102,148],"while":[103],"acoustic":[105],"models":[106],"are":[107],"minimize":[110,136],"weighted":[112],"sum":[113],"conventional":[116,189],"minimum":[117,190],"generation":[118,191],"loss":[119,123],"deceiving":[125],"Since":[128],"objective":[130],"GANs":[133],"divergence":[138,207],"(i.e.,":[139],"distribution":[140],"difference)":[141],"between":[142],"effectively":[152],"alleviates":[153],"on":[157],"We":[162],"evaluated":[163],"effectiveness":[165],"text-to-speech":[167],"voice":[169],"conversion,":[170],"found":[172,212],"generate":[178],"more":[179],"spectral":[181],"parameters":[182],"F":[184],"<sub":[185],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[186],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">0</sub>":[187],"than":[188],"error":[192],"training":[193],"algorithm":[194],"regardless":[195],"its":[197],"hyperparameter":[198],"settings.":[199],"Furthermore,":[200],"we":[201],"investigated":[202],"various":[209],"Wasserstein":[215],"minimizing":[217],"Earth-Mover's":[219],"distance":[220],"works":[221],"best":[223],"terms":[225],"improving":[227],"quality.":[231]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":18},{"year":2022,"cited_by_count":24},{"year":2021,"cited_by_count":34},{"year":2020,"cited_by_count":50},{"year":2019,"cited_by_count":50},{"year":2018,"cited_by_count":27},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-02T09:04:35.204637","created_date":"2025-10-10T00:00:00"}
