{"id":"https://openalex.org/W4225273076","doi":"https://doi.org/10.1109/icassp43922.2022.9746698","title":"Vocbench: A Neural Vocoder Benchmark for Speech Synthesis","display_name":"Vocbench: A Neural Vocoder Benchmark for Speech Synthesis","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4225273076","doi":"https://doi.org/10.1109/icassp43922.2022.9746698"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746698","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746698","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076974550","display_name":"Ehab A. AlBadawy","orcid":"https://orcid.org/0000-0003-3954-733X"},"institutions":[{"id":"https://openalex.org/I392282","display_name":"University at Albany, State University of New York","ror":"https://ror.org/012zs8222","country_code":"US","type":"education","lineage":["https://openalex.org/I392282"]},{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]},{"id":"https://openalex.org/I113508548","display_name":"Albany State University","ror":"https://ror.org/01vme4277","country_code":"US","type":"education","lineage":["https://openalex.org/I113508548"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ehab A. AlBadawy","raw_affiliation_strings":["University at Albany,State University of New York,USA","State University of New York, University at Albany, USA"],"affiliations":[{"raw_affiliation_string":"University at Albany,State University of New York,USA","institution_ids":["https://openalex.org/I113508548","https://openalex.org/I392282"]},{"raw_affiliation_string":"State University of New York, University at Albany, USA","institution_ids":["https://openalex.org/I392282","https://openalex.org/I113508548","https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011235394","display_name":"Andrew Gibiansky","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew Gibiansky","raw_affiliation_strings":["Facebook AI,USA","Facebook AI, USA"],"affiliations":[{"raw_affiliation_string":"Facebook AI,USA","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Facebook AI, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102191169","display_name":"Qing He","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qing He","raw_affiliation_strings":["Facebook AI,USA","Facebook AI, USA"],"affiliations":[{"raw_affiliation_string":"Facebook AI,USA","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Facebook AI, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057239023","display_name":"Jilong Wu","orcid":"https://orcid.org/0009-0007-8000-347X"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jilong Wu","raw_affiliation_strings":["Facebook AI,USA","Facebook AI, USA"],"affiliations":[{"raw_affiliation_string":"Facebook AI,USA","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Facebook AI, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012638803","display_name":"Ming\u2010Ching Chang","orcid":"https://orcid.org/0000-0001-9325-5341"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]},{"id":"https://openalex.org/I113508548","display_name":"Albany State University","ror":"https://ror.org/01vme4277","country_code":"US","type":"education","lineage":["https://openalex.org/I113508548"]},{"id":"https://openalex.org/I392282","display_name":"University at Albany, State University of New York","ror":"https://ror.org/012zs8222","country_code":"US","type":"education","lineage":["https://openalex.org/I392282"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ming-Ching Chang","raw_affiliation_strings":["University at Albany,State University of New York,USA","State University of New York, University at Albany, USA"],"affiliations":[{"raw_affiliation_string":"University at Albany,State University of New York,USA","institution_ids":["https://openalex.org/I113508548","https://openalex.org/I392282"]},{"raw_affiliation_string":"State University of New York, University at Albany, USA","institution_ids":["https://openalex.org/I392282","https://openalex.org/I113508548","https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023752172","display_name":"Siwei Lyu","orcid":"https://orcid.org/0000-0002-0992-685X"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]},{"id":"https://openalex.org/I63190737","display_name":"University at Buffalo, State University of New York","ror":"https://ror.org/01y64my43","country_code":"US","type":"education","lineage":["https://openalex.org/I63190737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siwei Lyu","raw_affiliation_strings":["University at Buffalo,State University of New York,USA","State University of New York, University at Buffalo, USA"],"affiliations":[{"raw_affiliation_string":"University at Buffalo,State University of New York,USA","institution_ids":["https://openalex.org/I63190737"]},{"raw_affiliation_string":"State University of New York, University at Buffalo, USA","institution_ids":["https://openalex.org/I63190737","https://openalex.org/I57206974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5076974550"],"corresponding_institution_ids":["https://openalex.org/I113508548","https://openalex.org/I392282","https://openalex.org/I57206974"],"apc_list":null,"apc_paid":null,"fwci":1.1482,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.79519796,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"881","last_page":"885"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8710918426513672},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.791948676109314},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7099316120147705},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6259989142417908},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.6150151491165161},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5198619961738586},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4941084682941437},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.423662006855011},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3739287257194519},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32799720764160156},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.05237764120101929}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8710918426513672},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.791948676109314},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7099316120147705},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6259989142417908},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.6150151491165161},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5198619961738586},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4941084682941437},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.423662006855011},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3739287257194519},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32799720764160156},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.05237764120101929},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746698","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746698","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1607803906","https://openalex.org/W2013035813","https://openalex.org/W2120847449","https://openalex.org/W2129069237","https://openalex.org/W2133665775","https://openalex.org/W2154920538","https://openalex.org/W2183923427","https://openalex.org/W2400274193","https://openalex.org/W2519091744","https://openalex.org/W2526050071","https://openalex.org/W2803229097","https://openalex.org/W2903739847","https://openalex.org/W2963091184","https://openalex.org/W2963539064","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2970006822","https://openalex.org/W2972359262","https://openalex.org/W2972882294","https://openalex.org/W3015338123","https://openalex.org/W3036167779","https://openalex.org/W3096037340","https://openalex.org/W3123097577","https://openalex.org/W3129651364","https://openalex.org/W3144035034","https://openalex.org/W4294562173","https://openalex.org/W4298580827","https://openalex.org/W4320013936","https://openalex.org/W6712612915","https://openalex.org/W6736996214","https://openalex.org/W6743481905","https://openalex.org/W6748409065","https://openalex.org/W6767111847","https://openalex.org/W6769767169","https://openalex.org/W6779823529","https://openalex.org/W6782760101","https://openalex.org/W6783182287","https://openalex.org/W6917585676","https://openalex.org/W6936113694"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W2088854863","https://openalex.org/W4402568167","https://openalex.org/W3179495260","https://openalex.org/W1976719989","https://openalex.org/W3127543252","https://openalex.org/W2897924318","https://openalex.org/W2138997758"],"abstract_inverted_index":{"Neural":[0],"vocoders,":[1],"used":[2,18],"for":[3,110,117,155],"converting":[4],"the":[5,13,75,107,130,143,152],"spectral":[6],"representations":[7],"of":[8,77,132,151],"an":[9],"audio":[10],"signal":[11],"to":[12,43,52,61,86,128],"waveforms,":[14],"are":[15],"a":[16,71,83,92,97,123,136],"commonly":[17],"component":[19],"in":[20,91],"speech":[21],"synthesis":[22],"pipelines.":[23],"It":[24],"focuses":[25],"on":[26],"synthesizing":[27],"waveforms":[28],"from":[29],"low-dimensional":[30],"representation,":[31],"such":[32,45],"as":[33],"Mel-Spectrograms.":[34],"In":[35,102],"recent":[36],"years,":[37],"different":[38,88,137],"approaches":[39],"have":[40],"been":[41],"introduced":[42],"develop":[44],"vocoders.":[46,80,120],"However,":[47],"it":[48],"becomes":[49],"more":[50],"challenging":[51],"assess":[53],"these":[54],"new":[55],"vocoders":[56,90],"and":[57,114,125,149],"compare":[58,129],"their":[59],"performance":[60,76,131],"previous":[62],"ones.":[63],"To":[64],"address":[65],"this":[66],"problem,":[67],"we":[68,105],"present":[69],"VocBench,":[70],"framework":[72,144,159],"that":[73,95,142],"benchmark":[74],"state-of-the-art":[78],"neural":[79,89,119],"VocBench":[81,158],"uses":[82],"systematic":[84],"study":[85],"evaluate":[87],"shared":[93],"environment":[94],"enables":[96],"fair":[98],"comparison":[99],"between":[100],"them.":[101],"our":[103],"experiments,":[104],"use":[106],"same":[108],"setup":[109],"datasets,":[111],"training":[112],"pipeline,":[113],"evaluation":[115,127],"metrics":[116],"all":[118],"We":[121],"perform":[122],"subjective":[124],"objective":[126],"each":[133,156],"vocoder":[134],"along":[135],"axis.":[138],"Our":[139],"results":[140],"demonstrate":[141],"can":[145],"show":[146],"competitive":[147],"efficacy":[148],"quality":[150],"synthesized":[153],"samples":[154],"vocoder.":[157],"is":[160],"available":[161],"at":[162],"https://github.com/facebookresearch/vocoder-benchmark.":[163]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
