{"id":"https://openalex.org/W2666408839","doi":"https://doi.org/10.1109/icassp.2017.7953090","title":"Generative adversarial network-based postfilter for statistical parametric speech synthesis","display_name":"Generative adversarial network-based postfilter for statistical parametric speech synthesis","publication_year":2017,"publication_date":"2017-03-01","ids":{"openalex":"https://openalex.org/W2666408839","doi":"https://doi.org/10.1109/icassp.2017.7953090","mag":"2666408839"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2017.7953090","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2017.7953090","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020693766","display_name":"Takuhiro Kaneko","orcid":"https://orcid.org/0009-0000-8016-5144"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takuhiro Kaneko","raw_affiliation_strings":["NTT Communication Science Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001243214","display_name":"Hirokazu Kameoka","orcid":"https://orcid.org/0000-0003-3102-0162"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hirokazu Kameoka","raw_affiliation_strings":["NTT Communication Science Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079710814","display_name":"Nobukatsu Hojo","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Nobukatsu Hojo","raw_affiliation_strings":["NTT Media Intelligence Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Media Intelligence Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068604686","display_name":"Yusuke Ijima","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yusuke Ijima","raw_affiliation_strings":["NTT Media Intelligence Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Media Intelligence Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112132273","display_name":"Kaoru Hiramatsu","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kaoru Hiramatsu","raw_affiliation_strings":["NTT Communication Science Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061465935","display_name":"Kunio Kashino","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kunio Kashino","raw_affiliation_strings":["NTT Media Intelligence Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Media Intelligence Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5020693766"],"corresponding_institution_ids":["https://openalex.org/I2251713219"],"apc_list":null,"apc_paid":null,"fwci":18.1368,"has_fulltext":false,"cited_by_count":128,"citation_normalized_percentile":{"value":0.99373175,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"4910","last_page":"4914"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7173705101013184},{"id":"https://openalex.org/keywords/smoothing","display_name":"Smoothing","score":0.7016645669937134},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.6959736347198486},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6350723505020142},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.6054885983467102},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5780797600746155},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.5448524951934814},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.4289567470550537},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4248066842556},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32594698667526245},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2061060070991516},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.1104036271572113},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.09047111868858337},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07853040099143982}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7173705101013184},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.7016645669937134},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.6959736347198486},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6350723505020142},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.6054885983467102},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5780797600746155},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.5448524951934814},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.4289567470550537},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4248066842556},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32594698667526245},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2061060070991516},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.1104036271572113},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.09047111868858337},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07853040099143982},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2017.7953090","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2017.7953090","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7099999785423279,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W68089216","https://openalex.org/W648143168","https://openalex.org/W1502723613","https://openalex.org/W1522301498","https://openalex.org/W1600722501","https://openalex.org/W1665214252","https://openalex.org/W1778816975","https://openalex.org/W1861150963","https://openalex.org/W1903029394","https://openalex.org/W1921523184","https://openalex.org/W1927394876","https://openalex.org/W1987992317","https://openalex.org/W2000513720","https://openalex.org/W2039800941","https://openalex.org/W2049036695","https://openalex.org/W2049686551","https://openalex.org/W2099057450","https://openalex.org/W2099471712","https://openalex.org/W2102003408","https://openalex.org/W2108674328","https://openalex.org/W2125389028","https://openalex.org/W2129142580","https://openalex.org/W2150658333","https://openalex.org/W2194775991","https://openalex.org/W2294797155","https://openalex.org/W2396043161","https://openalex.org/W2951523806","https://openalex.org/W2963684088","https://openalex.org/W2963857374","https://openalex.org/W2964121744","https://openalex.org/W2964167449","https://openalex.org/W4251158933","https://openalex.org/W4320013936","https://openalex.org/W6602768981","https://openalex.org/W6621378261","https://openalex.org/W6631190155","https://openalex.org/W6637242042","https://openalex.org/W6638023308","https://openalex.org/W6639125025","https://openalex.org/W6640185926","https://openalex.org/W6674887261","https://openalex.org/W6675380101","https://openalex.org/W6676044216","https://openalex.org/W6678815747","https://openalex.org/W6685352114","https://openalex.org/W6687506355","https://openalex.org/W6696843773","https://openalex.org/W7075637324"],"related_works":["https://openalex.org/W4280544492","https://openalex.org/W2953246223","https://openalex.org/W4293320219","https://openalex.org/W4283584549","https://openalex.org/W3110074278","https://openalex.org/W3176926761","https://openalex.org/W2544475605","https://openalex.org/W2539985974","https://openalex.org/W80665902","https://openalex.org/W2155528703"],"abstract_inverted_index":{"We":[0],"propose":[1,99],"a":[2,6,100,118,122,125],"postfilter":[3,102,169],"based":[4,60],"on":[5,31,61,88],"generative":[7],"adversarial":[8,140,143],"network":[9],"(GAN)":[10],"to":[11,66,77,131,138,148,155,189],"compensate":[12,171],"for":[13,172],"the":[14,32,39,45,69,79,92,95,105,109,113,129,135,139,146,150,167],"differences":[15,33],"between":[16],"natural":[17,190],"speech":[18,20,25,186],"and":[19,47,50,57,98,124,179],"synthesized":[21],"by":[22,35],"statistical":[23],"parametric":[24],"synthesis.":[26],"In":[27],"particular,":[28],"we":[29,86,116],"focus":[30,87],"caused":[34],"over-smoothing,":[36],"which":[37],"makes":[38],"sounds":[40],"muffled.":[41],"Over-smoothing":[42],"occurs":[43],"in":[44,54],"time":[46],"frequency":[48],"directions":[49],"is":[51,187],"highly":[52],"correlated":[53],"both":[55],"directions,":[56],"conventional":[58],"methods":[59],"heuristics":[62],"are":[63],"too":[64],"limited":[65],"cover":[67],"all":[68],"factors":[70],"(e.g.,":[71],"global":[72],"variance":[73],"was":[74],"designed":[75],"only":[76],"recover":[78],"dynamic":[80],"range).":[81],"To":[82,111],"solve":[83],"this":[84],"problem,":[85],"\u201cspectral":[89],"texture\u201d,":[90],"i.e.,":[91,154],"details":[93],"of":[94,121,162],"time-frequency":[96],"representation,":[97],"learning-based":[101],"that":[103,166,183],"captures":[104],"structures":[106,175],"directly":[107],"from":[108],"data.":[110],"estimate":[112],"true":[114,151],"distribution,":[115,153],"utilize":[117],"GAN":[119],"composed":[120],"generator":[123,130,147],"discriminator.":[126,141],"This":[127,142],"optimizes":[128],"produce":[132],"samples":[133],"imitating":[134],"dataset":[136],"according":[137],"process":[144],"encourages":[145],"fit":[149],"data":[152],"generate":[156],"realistic":[157],"spectral":[158,174],"texture.":[159],"Objective":[160],"evaluation":[161,181],"experimental":[163],"results":[164],"shows":[165,182],"GAN-based":[168],"can":[170],"detailed":[173],"including":[176],"modulation":[177],"spectrum,":[178],"subjective":[180],"its":[184],"generated":[185],"comparable":[188],"speech.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":24},{"year":2019,"cited_by_count":32},{"year":2018,"cited_by_count":24},{"year":2017,"cited_by_count":13}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
