{"id":"https://openalex.org/W4402830263","doi":"https://doi.org/10.1109/taslp.2024.3468005","title":"An Investigation of Time-Frequency Representation Discriminators for High-Fidelity Vocoders","display_name":"An Investigation of Time-Frequency Representation Discriminators for High-Fidelity Vocoders","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4402830263","doi":"https://doi.org/10.1109/taslp.2024.3468005"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3468005","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/taslp.2024.3468005","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032326279","display_name":"Yicheng Gu","orcid":"https://orcid.org/0009-0001-7819-5667"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yicheng Gu","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Guangzhou, China"],"raw_orcid":"https://orcid.org/0009-0001-7819-5667","affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Guangzhou, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100617727","display_name":"Xueyao Zhang","orcid":"https://orcid.org/0000-0003-2615-019X"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueyao Zhang","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Guangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Guangzhou, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009337933","display_name":"Liumeng Xue","orcid":"https://orcid.org/0000-0003-2815-8494"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liumeng Xue","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Guangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Guangzhou, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-9158-9401","affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Guangzhou, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102765381","display_name":"Zhizheng Wu","orcid":"https://orcid.org/0009-0001-1192-9857"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhizheng Wu","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Guangzhou, China"],"raw_orcid":"https://orcid.org/0009-0001-1192-9857","affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Guangzhou, China","institution_ids":["https://openalex.org/I4210116924"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5032326279"],"corresponding_institution_ids":["https://openalex.org/I4210116924"],"apc_list":null,"apc_paid":null,"fwci":0.3288,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.5451943,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"32","issue":null,"first_page":"4569","last_page":"4579"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9083999991416931,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9083999991416931,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.7027980089187622},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4998486042022705},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.41922321915626526},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.12591761350631714},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.07602179050445557}],"concepts":[{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.7027980089187622},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4998486042022705},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.41922321915626526},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.12591761350631714},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.07602179050445557},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3468005","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/taslp.2024.3468005","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7799999713897705,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G4327894984","display_name":null,"funder_award_id":"62376237","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W1582811089","https://openalex.org/W1963685724","https://openalex.org/W2006763798","https://openalex.org/W2015064056","https://openalex.org/W2020997493","https://openalex.org/W2042105302","https://openalex.org/W2067530554","https://openalex.org/W2152205330","https://openalex.org/W2191779130","https://openalex.org/W2471520273","https://openalex.org/W2590129515","https://openalex.org/W2884225676","https://openalex.org/W2929299742","https://openalex.org/W2936479322","https://openalex.org/W2962866891","https://openalex.org/W2963091184","https://openalex.org/W2963175743","https://openalex.org/W2963300588","https://openalex.org/W2972359262","https://openalex.org/W3015338123","https://openalex.org/W3034302232","https://openalex.org/W3081424945","https://openalex.org/W3096159803","https://openalex.org/W3158762648","https://openalex.org/W3196468212","https://openalex.org/W3197385343","https://openalex.org/W3198234802","https://openalex.org/W3204728450","https://openalex.org/W3206191467","https://openalex.org/W4224612669","https://openalex.org/W4285345683","https://openalex.org/W4296068763","https://openalex.org/W4307312879","https://openalex.org/W4307323391","https://openalex.org/W4372347654","https://openalex.org/W4377000449","https://openalex.org/W4386159913","https://openalex.org/W4391021724","https://openalex.org/W4392902768","https://openalex.org/W4392903389","https://openalex.org/W4392904136","https://openalex.org/W4392904361","https://openalex.org/W4392909842","https://openalex.org/W4392931580","https://openalex.org/W4399596931","https://openalex.org/W4399657485","https://openalex.org/W4400480473","https://openalex.org/W4401452051","https://openalex.org/W4402112619","https://openalex.org/W4402905495","https://openalex.org/W6688966550","https://openalex.org/W6695676441","https://openalex.org/W6727697161","https://openalex.org/W6748409065","https://openalex.org/W6753855596","https://openalex.org/W6757817989","https://openalex.org/W6767111847","https://openalex.org/W6771024825","https://openalex.org/W6778823374","https://openalex.org/W6779090866","https://openalex.org/W6783182287","https://openalex.org/W6785954764","https://openalex.org/W6796464841","https://openalex.org/W6802838302","https://openalex.org/W6838843145","https://openalex.org/W6843673214","https://openalex.org/W6846529734","https://openalex.org/W6851724922","https://openalex.org/W6853515095","https://openalex.org/W6854140664","https://openalex.org/W6859403842","https://openalex.org/W6862144568","https://openalex.org/W6917585676"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2381850946","https://openalex.org/W4380449851","https://openalex.org/W3125091513","https://openalex.org/W4318832338","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109"],"abstract_inverted_index":{"Generative":[0],"Adversarial":[1],"Network":[2],"(GAN)":[3],"based":[4],"vocoders":[5],"are":[6,40],"superior":[7],"in":[8,42,127,137],"both":[9,143],"inference":[10],"speed":[11],"and":[12,58,79,96,108,130,145,159,185],"synthesis":[13,175],"quality":[14,176],"when":[15],"reconstructing":[16],"an":[17,21],"audible":[18],"waveform":[19],"from":[20],"acoustic":[22],"representation.":[23],"This":[24],"study":[25],"focuses":[26],"on":[27,142],"improving":[28],"the":[29,149,156,174],"discriminator":[30,95],"for":[31,75,115,166],"GAN-based":[32,180],"vocoders.":[33],"Most":[34],"existing":[35],"Time-Frequency":[36,51],"Representation":[37],"(TFR)-based":[38],"discriminators":[39,161,171],"rooted":[41],"Short-Time":[43],"Fourier":[44],"Transform":[45,92,102],"(STFT),":[46],"which":[47],"owns":[48],"a":[49,59,88,97,111,123,133],"constant":[50],"(TF)":[52],"resolution,":[53],"linearly":[54],"scaled":[55],"center":[56],"frequencies,":[57],"fixed":[60],"decomposition":[61],"basis,":[62],"making":[63],"it":[64],"incompatible":[65],"with":[66],"signals":[67],"like":[68],"singing":[69,146],"voices":[70,147],"that":[71],"require":[72],"dynamic":[73,112],"attention":[74],"different":[76,80,116],"frequency":[77,117],"bands":[78],"time":[81],"intervals.":[82],"Motivated":[83],"by":[84],"that,":[85],"we":[86],"propose":[87],"Multi-Scale":[89,98],"Sub-Band":[90],"Constant-Q":[91],"CQT":[93,107,121],"(MS-SB-CQT)":[94],"Temporal-Compressed":[99],"Continuous":[100],"Wavelet":[101],"CWT":[103,109,131],"(MS-TC-CWT)":[104],"discriminator.":[105],"Both":[106],"have":[110],"TF":[113],"resolution":[114],"bands.":[118],"In":[119],"contrast,":[120],"has":[122,132],"better":[124,134,167],"modeling":[125,135],"ability":[126,136],"pitch":[128],"information,":[129],"short-time":[138],"transients.":[139],"Experiments":[140],"conducted":[141],"speech":[144],"confirm":[148],"effectiveness":[150],"of":[151,177],"our":[152],"proposed":[153,170],"discriminators.":[154],"Moreover,":[155],"STFT,":[157],"CQT,":[158],"CWT-based":[160],"can":[162,172],"be":[163],"used":[164],"jointly":[165],"performance.":[168],"The":[169],"boost":[173],"various":[178],"state-of-the-art":[179],"vocoders,":[181],"including":[182],"HiFi-GAN,":[183],"BigVGAN,":[184],"APNet.":[186]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
