{"id":"https://openalex.org/W3162618422","doi":"https://doi.org/10.1109/icassp39728.2021.9413954","title":"Efficient Adversarial Audio Synthesis VIA Progressive Upsampling","display_name":"Efficient Adversarial Audio Synthesis VIA Progressive Upsampling","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3162618422","doi":"https://doi.org/10.1109/icassp39728.2021.9413954","mag":"3162618422"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413954","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413954","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043276507","display_name":"Youngwoo Cho","orcid":"https://orcid.org/0000-0003-2606-0602"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Youngwoo Cho","raw_affiliation_strings":["Graduate School of AI, KAIST, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Graduate School of AI, KAIST, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108254805","display_name":"Minwook Chang","orcid":"https://orcid.org/0000-0003-0525-052X"},"institutions":[{"id":"https://openalex.org/I4210135449","display_name":"NCSOFT (South Korea)","ror":"https://ror.org/03q4mza74","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210135449"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Minwook Chang","raw_affiliation_strings":["Game AI Lab, NCSOFT, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Game AI Lab, NCSOFT, Republic of Korea","institution_ids":["https://openalex.org/I4210135449"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100756237","display_name":"Sanghyeon Lee","orcid":"https://orcid.org/0009-0003-7060-6315"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sanghyeon Lee","raw_affiliation_strings":["Graduate School of AI, KAIST, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Graduate School of AI, KAIST, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058677320","display_name":"Hyoung-Woo Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyoungwoo Lee","raw_affiliation_strings":["Korea University, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Korea University, Republic of Korea","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022834069","display_name":"Gerard Jounghyun Kim","orcid":"https://orcid.org/0000-0001-9880-8021"},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Gerard Jounghyun Kim","raw_affiliation_strings":["Korea University, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Korea University, Republic of Korea","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047912015","display_name":"Jaegul Choo","orcid":"https://orcid.org/0000-0003-1071-4835"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaegul Choo","raw_affiliation_strings":["Graduate School of AI, KAIST, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Graduate School of AI, KAIST, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5043276507"],"corresponding_institution_ids":["https://openalex.org/I157485424"],"apc_list":null,"apc_paid":null,"fwci":0.1524,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.41440162,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"3410","last_page":"3414"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/upsampling","display_name":"Upsampling","score":0.9173717498779297},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7633399367332458},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6454663276672363},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.6388071179389954},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5743036270141602},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.508272111415863},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4369242191314697},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3264060616493225},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.17984417080879211},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08549627661705017}],"concepts":[{"id":"https://openalex.org/C110384440","wikidata":"https://www.wikidata.org/wiki/Q1143270","display_name":"Upsampling","level":3,"score":0.9173717498779297},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7633399367332458},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6454663276672363},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.6388071179389954},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5743036270141602},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.508272111415863},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4369242191314697},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3264060616493225},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.17984417080879211},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08549627661705017},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413954","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413954","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.5299999713897705}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322093","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1510561091","https://openalex.org/W1522301498","https://openalex.org/W1987507179","https://openalex.org/W2090284173","https://openalex.org/W2099471712","https://openalex.org/W2120847449","https://openalex.org/W2152859600","https://openalex.org/W2785678896","https://openalex.org/W2797583228","https://openalex.org/W2894295011","https://openalex.org/W2901616798","https://openalex.org/W2910577860","https://openalex.org/W2919624000","https://openalex.org/W2962760235","https://openalex.org/W2962879692","https://openalex.org/W2962981281","https://openalex.org/W2963373786","https://openalex.org/W2963609956","https://openalex.org/W2963684088","https://openalex.org/W2963836885","https://openalex.org/W2964121744","https://openalex.org/W2964345931","https://openalex.org/W2970006822","https://openalex.org/W4294643831","https://openalex.org/W4295521014","https://openalex.org/W4297817572","https://openalex.org/W4320013936","https://openalex.org/W6631190155","https://openalex.org/W6647235038","https://openalex.org/W6685352114","https://openalex.org/W6718379498","https://openalex.org/W6735913928","https://openalex.org/W6736996214","https://openalex.org/W6745560452","https://openalex.org/W6748582592","https://openalex.org/W6750665317","https://openalex.org/W6755257315","https://openalex.org/W6756859573","https://openalex.org/W6758675244","https://openalex.org/W6760601182","https://openalex.org/W6767111847"],"related_works":["https://openalex.org/W2062399876","https://openalex.org/W2607795551","https://openalex.org/W3155117723","https://openalex.org/W1991429770","https://openalex.org/W1983892167","https://openalex.org/W2281134365","https://openalex.org/W4310746709","https://openalex.org/W4385574037","https://openalex.org/W4386075645","https://openalex.org/W4306309518"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3,15,44,57,61,66,95],"novel":[4],"generative":[5],"model":[6,39,49],"called":[7,40],"PUGAN,":[8],"which":[9,42],"progressively":[10],"synthesizes":[11],"high-quality":[12],"audio":[13,51,86],"in":[14,60,91,102],"raw":[16],"waveform.":[17],"Progressive":[18],"upsampling":[19],"GAN":[20],"(PUGAN)":[21],"leverages":[22],"the":[23,85,105],"progressive":[24,62],"generation":[25],"of":[26,70,100,104],"higher-resolution":[27],"output":[28],"by":[29],"stacking":[30],"multiple":[31],"encoder-decoder":[32],"architectures.":[33],"Compared":[34],"to":[35,56,98],"an":[36],"existing":[37],"state-of-the-art":[38],"WaveGAN,":[41],"uses":[43],"single":[45],"decoder":[46],"architecture,":[47],"our":[48],"generates":[50],"signals":[52,87],"and":[53,108],"converts":[54],"them":[55],"higher":[58],"resolution":[59],"manner,":[63],"while":[64],"using":[65],"significantly":[67],"smaller":[68,74],"number":[69],"parameters,":[71],"e.g.,":[72],"3.17x":[73],"for":[75],"16":[76],"kHz":[77],"output,":[78],"than":[79],"WaveGAN.":[80],"Our":[81],"experiments":[82],"show":[83],"that":[84,99],"can":[88],"be":[89],"generated":[90],"real":[92],"time":[93],"with":[94],"comparable":[96],"quality":[97],"WaveGAN":[101],"terms":[103],"inception":[106],"scores":[107],"human":[109],"perception.":[110]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
