{"id":"https://openalex.org/W3097828251","doi":"https://doi.org/10.21437/interspeech.2020-1238","title":"VocGAN: A High-Fidelity Real-Time Vocoder with a Hierarchically-Nested Adversarial Network","display_name":"VocGAN: A High-Fidelity Real-Time Vocoder with a Hierarchically-Nested Adversarial Network","publication_year":2020,"publication_date":"2020-10-25","ids":{"openalex":"https://openalex.org/W3097828251","doi":"https://doi.org/10.21437/interspeech.2020-1238","mag":"3097828251"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2020-1238","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1238","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073217180","display_name":"Jinhyeok Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135449","display_name":"NCSOFT (South Korea)","ror":"https://ror.org/03q4mza74","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210135449"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jinhyeok Yang","raw_affiliation_strings":["Speech AI Lab, NCSOFT, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Speech AI Lab, NCSOFT, Republic of Korea","institution_ids":["https://openalex.org/I4210135449"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079205409","display_name":"Junmo Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135449","display_name":"NCSOFT (South Korea)","ror":"https://ror.org/03q4mza74","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210135449"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Junmo Lee","raw_affiliation_strings":["Speech AI Lab, NCSOFT, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Speech AI Lab, NCSOFT, Republic of Korea","institution_ids":["https://openalex.org/I4210135449"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010564936","display_name":"Young-Ik Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135449","display_name":"NCSOFT (South Korea)","ror":"https://ror.org/03q4mza74","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210135449"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Youngik Kim","raw_affiliation_strings":["Speech AI Lab, NCSOFT, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Speech AI Lab, NCSOFT, Republic of Korea","institution_ids":["https://openalex.org/I4210135449"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052468556","display_name":"Hoon Young Cho","orcid":"https://orcid.org/0000-0002-6850-6580"},"institutions":[{"id":"https://openalex.org/I4210135449","display_name":"NCSOFT (South Korea)","ror":"https://ror.org/03q4mza74","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210135449"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hoon-Young Cho","raw_affiliation_strings":["Speech AI Lab, NCSOFT, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Speech AI Lab, NCSOFT, Republic of Korea","institution_ids":["https://openalex.org/I4210135449"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009028758","display_name":"Injung Kim","orcid":"https://orcid.org/0000-0003-4439-6097"},"institutions":[{"id":"https://openalex.org/I113825674","display_name":"Handong Global University","ror":"https://ror.org/00txhkt32","country_code":"KR","type":"education","lineage":["https://openalex.org/I113825674"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Injung Kim","raw_affiliation_strings":["School of CSEE, Handong Global University, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"School of CSEE, Handong Global University, Republic of Korea","institution_ids":["https://openalex.org/I113825674"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5073217180"],"corresponding_institution_ids":["https://openalex.org/I4210135449"],"apc_list":null,"apc_paid":null,"fwci":5.8445,"has_fulltext":false,"cited_by_count":60,"citation_normalized_percentile":{"value":0.96854512,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"200","last_page":"204"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9740999937057495,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9708999991416931,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7516446113586426},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7230522632598877},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.6562240123748779},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.45470529794692993},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.3551971912384033},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20534881949424744},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.17139014601707458},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09865057468414307},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.06781888008117676}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7516446113586426},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7230522632598877},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.6562240123748779},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.45470529794692993},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3551971912384033},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20534881949424744},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.17139014601707458},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09865057468414307},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.06781888008117676}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2020-1238","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1238","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.6700000166893005}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W2284050935","https://openalex.org/W2502312327","https://openalex.org/W2519091744","https://openalex.org/W2593414223","https://openalex.org/W2749651610","https://openalex.org/W2946200149","https://openalex.org/W2963073614","https://openalex.org/W2963300588","https://openalex.org/W2963413689","https://openalex.org/W2963800363","https://openalex.org/W2963975282","https://openalex.org/W2964243274","https://openalex.org/W2970006822","https://openalex.org/W3015338123","https://openalex.org/W4289305009","https://openalex.org/W4294619240","https://openalex.org/W4297606427"],"related_works":["https://openalex.org/W2971455341","https://openalex.org/W2963258858","https://openalex.org/W4288343117","https://openalex.org/W3005226846","https://openalex.org/W2468514837","https://openalex.org/W4306789193","https://openalex.org/W3208312582","https://openalex.org/W4226067002","https://openalex.org/W4385964512","https://openalex.org/W4212877023"],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,23,59,64,75,103,111,150],"novel":[3],"high-fidelity":[4,143],"real-time":[5],"neural":[6],"vocoder":[7],"called":[8],"VocGAN.A":[9],"recently":[10,141],"developed":[11,142],"GAN-based":[12],"vocoder,":[13,144],"MelGAN,":[14,45,116],"produces":[15,22],"speech":[16,98],"waveforms":[17,99],"in":[18,28,74,91,123],"real-time.However,":[19],"it":[20,47,117],"often":[21],"waveform":[24,61],"that":[25],"is":[26,40,146],"insufficient":[27],"quality":[29,51,122],"or":[30],"inconsistent":[31],"with":[32,115,132,137],"acoustic":[33,72],"characteristics":[34],"of":[35,54,71],"the":[36,50,55,80],"input":[37],"mel":[38],"spectrogram.VocGAN":[39],"nearly":[41],"as":[42,44],"fast":[43],"but":[46],"significantly":[48,120],"improves":[49],"and":[52,63,83,107,152],"consistency":[53],"output":[56],"waveform.VocGAN":[57],"applies":[58,79],"multi-scale":[60],"generator":[62],"hierarchically-nested":[65],"discriminator":[66],"to":[67],"learn":[68],"multiple":[69,124],"levels":[70],"properties":[73],"balanced":[76],"way.It":[77],"also":[78,118],"joint":[81],"conditional":[82],"unconditional":[84],"objective,":[85],"which":[86],"has":[87],"shown":[88],"successful":[89],"results":[90],"high-resolution":[92],"image":[93],"synthesis.In":[94],"experiments,":[95],"VocGAN":[96,145],"synthesizes":[97],"416.7x":[100],"faster":[101,109,148],"on":[102,110,149],"GTX":[104],"1080Ti":[105],"GPU":[106],"3.24x":[108],"CPU":[112,151],"than":[113],"realtime.Compared":[114],"exhibits":[119,153],"improved":[121],"evaluation":[125],"metrics":[126],"including":[127],"mean":[128],"opinion":[129],"score":[130],"(MOS)":[131],"minimal":[133],"additional":[134],"overhead.Additionally,":[135],"compared":[136],"Parallel":[138],"WaveGAN,":[139],"another":[140],"6.98x":[147],"higher":[154],"MOS.":[155]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":20},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
