{"id":"https://openalex.org/W2293049663","doi":"https://doi.org/10.1109/taslp.2016.2522655","title":"Postfilters to Modify the Modulation Spectrum for Statistical Parametric Speech Synthesis","display_name":"Postfilters to Modify the Modulation Spectrum for Statistical Parametric Speech Synthesis","publication_year":2016,"publication_date":"2016-01-27","ids":{"openalex":"https://openalex.org/W2293049663","doi":"https://doi.org/10.1109/taslp.2016.2522655","mag":"2293049663"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2016.2522655","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2016.2522655","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013050263","display_name":"Shinnosuke Takamichi","orcid":"https://orcid.org/0000-0003-0520-7847"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Shinnosuke Takamichi","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology, Ikoma, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078330211","display_name":"Tomoki Toda","orcid":"https://orcid.org/0000-0001-8146-1279"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomoki Toda","raw_affiliation_strings":["Information Technology Center, Nagoya University, Nagoya, Japan"],"affiliations":[{"raw_affiliation_string":"Information Technology Center, Nagoya University, Nagoya, Japan","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107337645","display_name":"Alan W. Black","orcid":"https://orcid.org/0000-0001-8820-8831"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alan W. Black","raw_affiliation_strings":["Language Technologies Institute, Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000692949","display_name":"Graham Neubig","orcid":null},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Graham Neubig","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology, Ikoma, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040108974","display_name":"Sakriani Sakti","orcid":"https://orcid.org/0000-0001-5509-8963"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sakriani Sakti","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology, Ikoma, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020994673","display_name":"Satoshi Nakamura","orcid":"https://orcid.org/0000-0001-6956-3803"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Nakamura","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology, Ikoma, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5013050263"],"corresponding_institution_ids":["https://openalex.org/I75917431"],"apc_list":null,"apc_paid":null,"fwci":15.4252,"has_fulltext":false,"cited_by_count":74,"citation_normalized_percentile":{"value":0.98951826,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":"24","issue":"4","first_page":"755","last_page":"767"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.7918875217437744},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7362802028656006},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7029894590377808},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.6553855538368225},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.6388264894485474},{"id":"https://openalex.org/keywords/smoothing","display_name":"Smoothing","score":0.6191526651382446},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5611927509307861},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4143872857093811},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3696834444999695},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16093367338180542},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07521352171897888}],"concepts":[{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.7918875217437744},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7362802028656006},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7029894590377808},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.6553855538368225},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.6388264894485474},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.6191526651382446},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5611927509307861},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4143872857093811},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3696834444999695},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16093367338180542},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07521352171897888},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2016.2522655","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2016.2522655","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":80,"referenced_works":["https://openalex.org/W187033940","https://openalex.org/W202879582","https://openalex.org/W1480055486","https://openalex.org/W1482298176","https://openalex.org/W1502723613","https://openalex.org/W1523372075","https://openalex.org/W1563460361","https://openalex.org/W1570629387","https://openalex.org/W1576227399","https://openalex.org/W1600722501","https://openalex.org/W1778816975","https://openalex.org/W1935012542","https://openalex.org/W1963627370","https://openalex.org/W1963710239","https://openalex.org/W1964420823","https://openalex.org/W1984905644","https://openalex.org/W1987992317","https://openalex.org/W1990383786","https://openalex.org/W1990505856","https://openalex.org/W1990967464","https://openalex.org/W1992228106","https://openalex.org/W1995332880","https://openalex.org/W1999686891","https://openalex.org/W2000513720","https://openalex.org/W2005438552","https://openalex.org/W2005768155","https://openalex.org/W2029434926","https://openalex.org/W2031321541","https://openalex.org/W2039800941","https://openalex.org/W2043003570","https://openalex.org/W2049686551","https://openalex.org/W2059610484","https://openalex.org/W2060554399","https://openalex.org/W2072473772","https://openalex.org/W2075012882","https://openalex.org/W2100140000","https://openalex.org/W2100649345","https://openalex.org/W2103253424","https://openalex.org/W2106792148","https://openalex.org/W2109444541","https://openalex.org/W2111284386","https://openalex.org/W2115040572","https://openalex.org/W2120605154","https://openalex.org/W2129142580","https://openalex.org/W2134202996","https://openalex.org/W2142183264","https://openalex.org/W2150658333","https://openalex.org/W2154920538","https://openalex.org/W2156142001","https://openalex.org/W2170580867","https://openalex.org/W2187234408","https://openalex.org/W2242005248","https://openalex.org/W2283817422","https://openalex.org/W2294351487","https://openalex.org/W2395578248","https://openalex.org/W2397642903","https://openalex.org/W2397893613","https://openalex.org/W2406654659","https://openalex.org/W2407039802","https://openalex.org/W2551677481","https://openalex.org/W2917245127","https://openalex.org/W2943553228","https://openalex.org/W6607663849","https://openalex.org/W6608197479","https://openalex.org/W6628615481","https://openalex.org/W6629248834","https://openalex.org/W6631309588","https://openalex.org/W6638023308","https://openalex.org/W6641076745","https://openalex.org/W6677973343","https://openalex.org/W6690391285","https://openalex.org/W6695760510","https://openalex.org/W6696767757","https://openalex.org/W6711777497","https://openalex.org/W6712459571","https://openalex.org/W6712788173","https://openalex.org/W6713535213","https://openalex.org/W6714093102","https://openalex.org/W6730233244","https://openalex.org/W7075637324"],"related_works":["https://openalex.org/W2904846757","https://openalex.org/W175280642","https://openalex.org/W2688184458","https://openalex.org/W642007152","https://openalex.org/W4253660971","https://openalex.org/W2401827384","https://openalex.org/W2355290951","https://openalex.org/W2164147372","https://openalex.org/W2550171623","https://openalex.org/W596245619"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"novel":[3],"approaches":[4],"based":[5,186],"on":[6,187],"modulation":[7],"spectrum":[8],"(MS)":[9],"for":[10],"high-quality":[11],"statistical":[12,24,188],"parametric":[13,25,189],"speech":[14,26,33,37,48,74,101,109,119,122,137,171,190,291],"synthesis,":[15,34],"including":[16],"text-to-speech":[17],"(TTS)":[18],"and":[19,91,120,231,233,261,293,310],"voice":[20],"conversion":[21],"(VC).":[22],"Although":[23],"synthesis":[27,49,284],"offers":[28],"various":[29,184],"advantages":[30],"over":[31],"concatenative":[32,47],"the":[35,51,58,62,66,72,88,92,96,99,114,132,135,147,156,167,198,202,219,246,254,259,268,276,295],"synthetic":[36,121,170,290],"quality":[38,52,63,115,251],"is":[39,65,80,123],"still":[40,124],"not":[41,302],"as":[42,44,82,140],"good":[43],"that":[45,174,244],"of":[46,53,57,94,98,107,134,169,175,197,204,218,281],"or":[50,161],"natural":[54,108,118,176],"speech.":[55,177],"One":[56],"biggest":[59],"issues":[60],"causing":[61],"degradation":[64],"over-smoothing":[67,89,148],"effect":[68],"often":[69],"observed":[70],"in":[71,201,225,272,289,301],"generated":[73,100,136],"parameter":[75,102,138],"trajectories.":[76],"Global":[77],"variance":[78],"(GV)":[79],"known":[81],"a":[83,141],"feature":[84,143],"well":[85],"correlated":[86],"with":[87],"effect,":[90],"effectiveness":[93,217],"keeping":[95],"GV":[97],"trajectories":[103,139],"similar":[104],"to":[105,144,154,165,173,183,253,267],"those":[106],"has":[110],"been":[111],"confirmed.":[112],"However,":[113],"gap":[116],"between":[117],"large.":[125],"In":[126],"this":[127],"paper,":[128],"we":[129,151],"propose":[130,152],"using":[131],"MS":[133,157,168],"new":[142],"effectively":[145],"quantify":[146],"effect.":[149],"Moreover,":[150],"postfilters":[153,180,221,297],"modify":[155],"utterance":[158,160],"by":[159,163,265],"segment":[162,164],"make":[166],"close":[172],"The":[178,240],"proposed":[179,199,220,247,277,296],"are":[181,222,298],"applicable":[182],"synthesizers":[185],"synthesis.":[191],"We":[192],"first":[193],"perform":[194],"an":[195],"evaluation":[196],"method":[200],"framework":[203],"hidden":[205],"Markov":[206],"model":[207,228],"(HMM)-based":[208],"TTS,":[209,274],"examining":[210],"its":[211],"properties":[212],"from":[213],"different":[214],"perspectives.":[215],"Furthermore,":[216],"also":[223,285,299,307],"evaluated":[224],"Gaussian":[226],"mixture":[227],"(GMM)-based":[229],"VC":[230,309],"classification":[232],"regression":[234],"trees":[235],"(CART)-based":[236],"TTS":[237,305],"(a.k.a.,":[238],"CLUSTERGEN).":[239],"experimental":[241],"results":[242],"demonstrate":[243],"1)":[245],"utterance-level":[248],"postfilter":[249,279],"achieves":[250],"comparable":[252],"conventional":[255],"generation":[256,270],"algorithm":[257,271],"considering":[258],"GV,":[260],"yields":[262,286],"significant":[263,287],"improvements":[264,288],"applying":[266],"GV-based":[269],"HMM-based":[273,304],"2)":[275],"segment-level":[278],"capable":[280],"achieving":[282],"low-delay":[283],"quality,":[292],"3)":[294],"effective":[300],"only":[303],"but":[306],"GMM-based":[308],"CLUSTERGEN.":[311]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":10},{"year":2017,"cited_by_count":13},{"year":2016,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
