{"id":"https://openalex.org/W4401212885","doi":"https://doi.org/10.1145/3653876.3653893","title":"DeepGAN: A fast and high-quality time-domain-based neural vocoder for low-resource scenarios","display_name":"DeepGAN: A fast and high-quality time-domain-based neural vocoder for low-resource scenarios","publication_year":2024,"publication_date":"2024-02-23","ids":{"openalex":"https://openalex.org/W4401212885","doi":"https://doi.org/10.1145/3653876.3653893"},"language":"en","primary_location":{"id":"doi:10.1145/3653876.3653893","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3653876.3653893","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3653876.3653893?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 8th International Conference on Digital Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3653876.3653893?download=true","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026358075","display_name":"Yuan Jiang","orcid":"https://orcid.org/0009-0006-2755-9375"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuan Jiang","raw_affiliation_strings":["University of Science and Technology of China, China"],"raw_orcid":"https://orcid.org/0009-0006-2755-9375","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022079981","display_name":"Shun Bao","orcid":"https://orcid.org/0009-0002-6386-1345"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shun Bao","raw_affiliation_strings":["iFLYTEK Research, iFLYTEK Co., Ltd., China"],"raw_orcid":"https://orcid.org/0009-0002-6386-1345","affiliations":[{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd., China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010426665","display_name":"Yajun Hu","orcid":"https://orcid.org/0009-0007-9626-456X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yajun Hu","raw_affiliation_strings":["iFLYTEK Research, iFLYTEK Co., Ltd., China"],"raw_orcid":"https://orcid.org/0009-0007-9626-456X","affiliations":[{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd., China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018120069","display_name":"Lijuan Liu","orcid":"https://orcid.org/0000-0001-6126-1077"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lijuan Liu","raw_affiliation_strings":["iFLYTEK Research, iFLYTEK Co., Ltd., China"],"raw_orcid":"https://orcid.org/0000-0001-6126-1077","affiliations":[{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd., China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057641339","display_name":"Guoping Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guoping Hu","raw_affiliation_strings":["iFLYTEK Research, iFLYTEK Co., Ltd., China"],"raw_orcid":"https://orcid.org/0009-0006-1038-0336","affiliations":[{"raw_affiliation_string":"iFLYTEK Research, iFLYTEK Co., Ltd., China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045907056","display_name":"Yang Ai","orcid":"https://orcid.org/0000-0001-6668-022X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Ai","raw_affiliation_strings":["University of Science and Technology of China, China"],"raw_orcid":"https://orcid.org/0000-0001-6668-022X","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Ling","raw_affiliation_strings":["University of Science and Technology of China, China"],"raw_orcid":"https://orcid.org/0000-0001-7853-5273","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5026358075"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12495361,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"112","last_page":"117"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7862259149551392},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4813447892665863},{"id":"https://openalex.org/keywords/time-domain","display_name":"Time domain","score":0.4656079113483429},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4493817985057831},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.38369521498680115},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.13508382439613342},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.053441524505615234}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7862259149551392},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4813447892665863},{"id":"https://openalex.org/C103824480","wikidata":"https://www.wikidata.org/wiki/Q185889","display_name":"Time domain","level":2,"score":0.4656079113483429},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4493817985057831},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.38369521498680115},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.13508382439613342},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.053441524505615234},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3653876.3653893","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3653876.3653893","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3653876.3653893?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 8th International Conference on Digital Signal Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3653876.3653893","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3653876.3653893","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3653876.3653893?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 8th International Conference on Digital Signal Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4401212885.pdf","grobid_xml":"https://content.openalex.org/works/W4401212885.grobid-xml"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W2593414223","https://openalex.org/W2889329491","https://openalex.org/W2972359262","https://openalex.org/W3015338123","https://openalex.org/W3096442195","https://openalex.org/W3097828251","https://openalex.org/W3150572638","https://openalex.org/W3161172673","https://openalex.org/W3161236344","https://openalex.org/W3197273793","https://openalex.org/W4312361963","https://openalex.org/W4372262501"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,14,29,135,140],"neural":[3],"vocoders":[4,20,114],"have":[5],"primarily":[6],"relied":[7],"on":[8],"generative":[9],"adversarial":[10],"networks":[11],"(GANs)":[12],"operating":[13],"the":[15,73,77,93,96,122,145],"time":[16],"domain.":[17],"However,":[18],"these":[19],"are":[21],"parameter-heavy":[22],"and":[23,45,84,101,118],"computationally":[24],"expensive,":[25],"limiting":[26],"their":[27],"use":[28],"resource-constrained":[30],"environments":[31],"such":[32],"as":[33,72],"embedded":[34],"devices.":[35],"Depthwise":[36],"separable":[37,70],"convolution,":[38],"known":[39],"for":[40,115],"its":[41],"lower":[42],"parameter":[43,123],"count":[44,124],"reduced":[46],"computational":[47],"costs,":[48],"can":[49],"be":[50],"employed":[51],"to":[52,63,91,112],"construct":[53],"lightweight":[54,87],"networks.":[55],"In":[56],"this":[57],"paper,":[58],"we":[59],"introduce":[60],"an":[61,136],"extension":[62],"HiFi-GAN,":[64,133],"named":[65],"DeepGAN,":[66],"which":[67],"utilizes":[68],"depthwise":[69],"convolution":[71],"primary":[74],"unit":[75],"within":[76],"network,":[78],"introduces":[79],"a":[80,86],"novel":[81],"upsample":[82],"module,":[83],"incorporates":[85],"excitation":[88],"generation":[89,141],"network":[90],"enhance":[92],"quality":[94],"of":[95,125,130,132],"generated":[97],"speech.":[98],"Both":[99],"objective":[100],"subjective":[102],"evaluations":[103],"demonstrate":[104],"that":[105,131],"our":[106],"proposed":[107],"DeepGAN":[108,126],"achieves":[109],"comparable":[110],"results":[111],"competing":[113],"both":[116],"seen":[117],"unseen":[119],"speakers.":[120],"Notably,":[121],"is":[127],"only":[128],"1/7":[129],"resulting":[134],"approximately":[137],"sixfold":[138],"improvement":[139],"speed,":[142],"while":[143],"maintaining":[144],"synthesized":[146],"speech":[147],"quality.":[148]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
