{"id":"https://openalex.org/W2746654391","doi":"https://doi.org/10.21437/interspeech.2017-962","title":"Generative Adversarial Network-Based Postfilter for STFT Spectrograms","display_name":"Generative Adversarial Network-Based Postfilter for STFT Spectrograms","publication_year":2017,"publication_date":"2017-08-16","ids":{"openalex":"https://openalex.org/W2746654391","doi":"https://doi.org/10.21437/interspeech.2017-962","mag":"2746654391"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2017-962","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-962","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.pure.ed.ac.uk/ws/files/41251122/962_file_Paper.pdf.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020693766","display_name":"Takuhiro Kaneko","orcid":"https://orcid.org/0009-0000-8016-5144"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takuhiro Kaneko","raw_affiliation_strings":["NTT Communication Science Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062895056","display_name":"Shinji Takaki","orcid":"https://orcid.org/0000-0001-7294-7699"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinji Takaki","raw_affiliation_strings":["National Institute of Informatics, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001243214","display_name":"Hirokazu Kameoka","orcid":"https://orcid.org/0000-0003-3102-0162"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hirokazu Kameoka","raw_affiliation_strings":["NTT Communication Science Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007639385","display_name":"Junichi Yamagishi","orcid":"https://orcid.org/0000-0003-2752-3955"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Junichi Yamagishi","raw_affiliation_strings":["National Institute of Informatics, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Japan","institution_ids":["https://openalex.org/I184597095"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5020693766"],"corresponding_institution_ids":["https://openalex.org/I2251713219"],"apc_list":null,"apc_paid":null,"fwci":3.8827,"has_fulltext":true,"cited_by_count":72,"citation_normalized_percentile":{"value":0.96249668,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3389","last_page":"3393"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9017000198364258,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9017000198364258,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.9117993116378784},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6969853043556213},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.6721161007881165},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6118264198303223},{"id":"https://openalex.org/keywords/short-time-fourier-transform","display_name":"Short-time Fourier transform","score":0.5424855351448059},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5424422025680542},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5386241674423218},{"id":"https://openalex.org/keywords/generative-adversarial-network","display_name":"Generative adversarial network","score":0.4997220039367676},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.45025575160980225},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.20286279916763306},{"id":"https://openalex.org/keywords/fourier-transform","display_name":"Fourier transform","score":0.1600399911403656},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14952021837234497},{"id":"https://openalex.org/keywords/fourier-analysis","display_name":"Fourier analysis","score":0.08229878544807434}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.9117993116378784},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6969853043556213},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.6721161007881165},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6118264198303223},{"id":"https://openalex.org/C166386157","wikidata":"https://www.wikidata.org/wiki/Q1477735","display_name":"Short-time Fourier transform","level":4,"score":0.5424855351448059},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5424422025680542},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5386241674423218},{"id":"https://openalex.org/C2988773926","wikidata":"https://www.wikidata.org/wiki/Q25104379","display_name":"Generative adversarial network","level":3,"score":0.4997220039367676},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.45025575160980225},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.20286279916763306},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.1600399911403656},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14952021837234497},{"id":"https://openalex.org/C203024314","wikidata":"https://www.wikidata.org/wiki/Q1365258","display_name":"Fourier analysis","level":3,"score":0.08229878544807434},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2017-962","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-962","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:publications/45771576-e61c-4ddd-8145-3dda8ec9251c","is_oa":true,"landing_page_url":"http://hdl.handle.net/20.500.11820/45771576-e61c-4ddd-8145-3dda8ec9251c","pdf_url":"https://www.pure.ed.ac.uk/ws/files/41251122/962_file_Paper.pdf.pdf","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:pure.ed.ac.uk:publications/45771576-e61c-4ddd-8145-3dda8ec9251c","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/45771576-e61c-4ddd-8145-3dda8ec9251c","pdf_url":"https://www.pure.ed.ac.uk/ws/files/41251122/962_file_Paper.pdf.pdf","source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Kaneko, T, Takaki, S, Kameoka, H & Yamagishi, J 2017, Generative Adversarial Network-based Postfilter for STFT Spectrograms. in Proceedings Interspeech 2017. Interspeech, International Speech Communication Association, pp. 3389-3393, Interspeech 2017, Stockholm, Sweden, 20/08/17. https://doi.org/10.21437/Interspeech.2017-962","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/45771576-e61c-4ddd-8145-3dda8ec9251c","is_oa":true,"landing_page_url":"https://hdl.handle.net/20.500.11820/45771576-e61c-4ddd-8145-3dda8ec9251c","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Kaneko, T, Takaki, S, Kameoka, H & Yamagishi, J 2017, Generative Adversarial Network-based Postfilter for STFT Spectrograms. in Proceedings Interspeech 2017. Interspeech, International Speech Communication Association, pp. 3389-3393, Interspeech 2017, Stockholm, Sweden, 20/08/17. https://doi.org/10.21437/Interspeech.2017-962","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:publications/45771576-e61c-4ddd-8145-3dda8ec9251c","is_oa":true,"landing_page_url":"http://hdl.handle.net/20.500.11820/45771576-e61c-4ddd-8145-3dda8ec9251c","pdf_url":"https://www.pure.ed.ac.uk/ws/files/41251122/962_file_Paper.pdf.pdf","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2327824500","display_name":"Development of augmented speech production techniques based on combination of statistical approaches and speech production modeling approaches","funder_award_id":"26280060","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G2340638847","display_name":null,"funder_award_id":"ACT-I","funder_id":"https://openalex.org/F4320334789","funder_display_name":"Japan Science and Technology Agency"},{"id":"https://openalex.org/G3349790956","display_name":"Direct modeling of speech waveform using a DNN for text-to-speech synthesis","funder_award_id":"16K16096","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4081503303","display_name":"\u96c6\u56e3\u306e\u885b\u751f\u7ba1\u7406\u306b\u95a2\u3059\u308b\u30c1\u30a7\u30c3\u30af\u30ea\u30b9\u30c8\u306e\u7814\u7a76(\u7d992\u5e74)","funder_award_id":"73010","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4789093758","display_name":null,"funder_award_id":"MEXT/JSPS","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G5330081643","display_name":"\u8c61\u8c61\u6307\u5c0e\u306b\u304a\u3051\u308b\u57fa\u672c\u7684\u6982\u5ff5\u306e\u5fb9\u5e95\u3068\u305d\u306e\u6307\u5c0e\u6cd5\u2015\u6c34\u306e\u5faa\u74b0\u3092\u67f1\u3068\u3057\u305f\u300c\u6e7f\u5ea6\u306e\u5909\u5316\u300d\u306e\u6307\u5c0e\u4f8b\u2015","funder_award_id":"16096","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G6368615416","display_name":"Acoustic scene analysis based on time-space acoustic signal modeling and machine learning","funder_award_id":"26730100","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7689325265","display_name":"\u901a\u5b66\u7a2e\u985e\u3068\u8d70\u529b\u3068\u306e\u95a2\u4fc2\u53ca\u30732,3\u306e\u4e8b\u9805\u3068\u5b66\u529b\u3068\u306e\u95a2\u4fc2\u8abf\u67fb\u7d71\u8a08","funder_award_id":"28006","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7771753304","display_name":"\u7d30\u80de\u8a3a\u5857\u682a\u7d30\u80de\u306e\u5149\u9855\u7684\u304a\u3088\u3073\u96fb\u9855\u7684\u89b3\u5bdf","funder_award_id":"267301","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G8991044360","display_name":null,"funder_award_id":"16K16096","funder_id":"https://openalex.org/F4320320912","funder_display_name":"Ministry of Education, Culture, Sports, Science and Technology"}],"funders":[{"id":"https://openalex.org/F4320320912","display_name":"Ministry of Education, Culture, Sports, Science and Technology","ror":"https://ror.org/048rj2z13"},{"id":"https://openalex.org/F4320325763","display_name":"Telecommunications Advancement Foundation","ror":"https://ror.org/05y77zf79"},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320334789","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W2746654391.pdf"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1502723613","https://openalex.org/W1504438288","https://openalex.org/W1522301498","https://openalex.org/W1665214252","https://openalex.org/W1778816975","https://openalex.org/W1836465849","https://openalex.org/W1921523184","https://openalex.org/W1927394876","https://openalex.org/W1987992317","https://openalex.org/W2000513720","https://openalex.org/W2102003408","https://openalex.org/W2111284386","https://openalex.org/W2120847449","https://openalex.org/W2125389028","https://openalex.org/W2135029798","https://openalex.org/W2149942017","https://openalex.org/W2194775991","https://openalex.org/W2395849284","https://openalex.org/W2396043161","https://openalex.org/W2666408839","https://openalex.org/W2749881488","https://openalex.org/W2951523806","https://openalex.org/W4308909683","https://openalex.org/W4320013936"],"related_works":["https://openalex.org/W2120540196","https://openalex.org/W3095343173","https://openalex.org/W2381036744","https://openalex.org/W2288135719","https://openalex.org/W2533590149","https://openalex.org/W2901989338","https://openalex.org/W82005754","https://openalex.org/W3210733254","https://openalex.org/W4288265599","https://openalex.org/W3165634890"],"abstract_inverted_index":{"We":[0,126,152],"propose":[1,79],"a":[2,80,106,123,158],"learning-based":[3],"postfilter":[4,85,103,140,156,168],"to":[5,46,64,90,157,172],"reconstruct":[6,73],"the":[7,29,51,60,65,92,118,129,138,142,148,174,183],"high-fidelity":[8],"spectral":[9],"texture":[10],"in":[11,96,182],"short-term":[12],"Fourier":[13],"transform":[14],"(STFT)":[15],"spectrograms.":[16],"In":[17,40],"speech-processing":[18],"systems,":[19],"such":[20,116],"as":[21,36,117],"speech":[22,27],"synthesis,":[23],"voice":[24],"conversion,":[25],"and":[26,72,145,178],"enhancement,":[28],"STFT":[30,185],"spectrograms":[31,130],"have":[32],"been":[33],"widely":[34],"used":[35,171],"key":[37],"acoustic":[38],"representations.":[39],"these":[41,70],"tasks,":[42],"we":[43,78,121],"normally":[44],"need":[45],"precisely":[47],"generate":[48],"or":[49],"predict":[50],"representations":[52],"from":[53],"inputs;":[54],"however,":[55],"generated":[56],"spectra":[57,74],"typically":[58],"lack":[59],"fine":[61],"structures":[62],"close":[63],"true":[66,93],"data.":[67],"To":[68],"overcome":[69],"limitations":[71],"having":[75],"finer":[76],"structures,":[77],"generative":[81],"adversarial":[82,97],"network":[83],"(GAN)-based":[84],"that":[86,105,165],"is":[87,104],"implicitly":[88],"optimized":[89],"match":[91],"feature":[94],"distribution":[95],"learning.":[98],"The":[99,162],"challenge":[100],"with":[101,135,150],"this":[102],"GAN":[107],"cannot":[108],"be":[109,170],"easily":[110],"trained":[111],"for":[112,141],"very":[113],"high-dimensional":[114],"data":[115],"STFT.":[119],"Therefore,":[120],"introduce":[122],"divide-and-concatenate":[124],"strategy.":[125],"first":[127],"divide":[128],"into":[131],"multiple":[132],"frequency":[133],"bands":[134,149],"overlap,":[136],"train":[137],"GAN-based":[139],"individual":[143],"bands,":[144],"finally":[146],"connect":[147],"overlap.":[151],"applied":[153],"our":[154,166],"proposed":[155,167],"DNN-based":[159],"speech-synthesis":[160],"task.":[161],"results":[163],"show":[164],"can":[169],"reduce":[173],"gap":[175],"between":[176],"synthesized":[177],"target":[179],"spectra,":[180],"even":[181],"highdimensional":[184],"domain.":[186]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":18},{"year":2018,"cited_by_count":12},{"year":2017,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
