{"id":"https://openalex.org/W4225311785","doi":"https://doi.org/10.1109/icassp43922.2022.9747419","title":"Architecture for Variable Bitrate Neural Speech Codec with Configurable Computation Complexity","display_name":"Architecture for Variable Bitrate Neural Speech Codec with Configurable Computation Complexity","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4225311785","doi":"https://doi.org/10.1109/icassp43922.2022.9747419"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747419","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747419","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076520549","display_name":"Tejas Jayashankar","orcid":null},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tejas Jayashankar","raw_affiliation_strings":["Massachusetts Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022133847","display_name":"Thilo Koehler","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Thilo Koehler","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008449971","display_name":"Kaustubh Kalgaonkar","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Kaustubh Kalgaonkar","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085174046","display_name":"Zhiping Xiu","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Zhiping Xiu","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057239023","display_name":"Jilong Wu","orcid":"https://orcid.org/0009-0007-8000-347X"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Jilong Wu","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084847368","display_name":"Ju Lin","orcid":"https://orcid.org/0000-0002-6970-4247"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Ju Lin","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043514543","display_name":"Prabhav Agrawal","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Prabhav Agrawal","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102191169","display_name":"Qing He","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Qing He","raw_affiliation_strings":["Facebook AI"],"affiliations":[{"raw_affiliation_string":"Facebook AI","institution_ids":["https://openalex.org/I2252078561"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5076520549"],"corresponding_institution_ids":["https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":0.3677,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.47740916,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"861","last_page":"865"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.852115273475647},{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.8506516218185425},{"id":"https://openalex.org/keywords/adaptive-multi-rate-audio-codec","display_name":"Adaptive Multi-Rate audio codec","score":0.7815291881561279},{"id":"https://openalex.org/keywords/codec2","display_name":"Codec2","score":0.7419023513793945},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.7090675234794617},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6090260744094849},{"id":"https://openalex.org/keywords/psqm","display_name":"PSQM","score":0.5360679030418396},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5151225924491882},{"id":"https://openalex.org/keywords/linear-predictive-coding","display_name":"Linear predictive coding","score":0.48814448714256287},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4761926233768463},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4618808925151825},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.1987629532814026},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.15332072973251343}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.852115273475647},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.8506516218185425},{"id":"https://openalex.org/C177067256","wikidata":"https://www.wikidata.org/wiki/Q4676210","display_name":"Adaptive Multi-Rate audio codec","level":4,"score":0.7815291881561279},{"id":"https://openalex.org/C75217168","wikidata":"https://www.wikidata.org/wiki/Q1105653","display_name":"Codec2","level":4,"score":0.7419023513793945},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.7090675234794617},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6090260744094849},{"id":"https://openalex.org/C108699837","wikidata":"https://www.wikidata.org/wiki/Q7120750","display_name":"PSQM","level":4,"score":0.5360679030418396},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5151225924491882},{"id":"https://openalex.org/C59883199","wikidata":"https://www.wikidata.org/wiki/Q1826438","display_name":"Linear predictive coding","level":3,"score":0.48814448714256287},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4761926233768463},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4618808925151825},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.1987629532814026},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.15332072973251343}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747419","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747419","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.46000000834465027,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2020883660","https://openalex.org/W2120847449","https://openalex.org/W2151626637","https://openalex.org/W2519091744","https://openalex.org/W2752796333","https://openalex.org/W2908510526","https://openalex.org/W2935711438","https://openalex.org/W2963091184","https://openalex.org/W2963799213","https://openalex.org/W2970006822","https://openalex.org/W2971074500","https://openalex.org/W2972354707","https://openalex.org/W3008715487","https://openalex.org/W3092028330","https://openalex.org/W3096468295","https://openalex.org/W3163662330","https://openalex.org/W3186609711","https://openalex.org/W3197042120","https://openalex.org/W3215615641","https://openalex.org/W4205788663","https://openalex.org/W4298580827","https://openalex.org/W6631190155","https://openalex.org/W6639363673","https://openalex.org/W6748409065","https://openalex.org/W6757817989","https://openalex.org/W6762390287","https://openalex.org/W6762931180","https://openalex.org/W6767111847","https://openalex.org/W6774662120","https://openalex.org/W6783867762","https://openalex.org/W6798098866"],"related_works":["https://openalex.org/W2159307410","https://openalex.org/W2151333624","https://openalex.org/W1538113069","https://openalex.org/W1999835922","https://openalex.org/W1542588102","https://openalex.org/W1911859126","https://openalex.org/W2289505355","https://openalex.org/W10322252","https://openalex.org/W4224929672","https://openalex.org/W2020464095"],"abstract_inverted_index":{"Low":[0],"bitrate":[1],"speech":[2,12,34,59,109,130],"codecs":[3],"have":[4],"become":[5],"an":[6],"area":[7],"of":[8,70],"intense":[9],"research.":[10],"Traditional":[11],"codecs,":[13],"which":[14,36],"use":[15],"signal":[16],"processing":[17],"methods":[18],"to":[19,72,105],"encode":[20],"and":[21,75,81,93],"decode":[22],"speech,":[23],"often":[24],"suffer":[25],"from":[26],"quality":[27],"issues":[28],"at":[29,79,97],"low":[30],"bitrates.":[31,99],"A":[32],"neural":[33,40,58],"codec,":[35],"uses":[37,86,121],"a":[38,56,87,106,122,126],"deep":[39],"network":[41],"in":[42],"the":[43,113,118],"compression":[44],"pipeline,":[45],"can":[46,77],"help":[47],"alleviate":[48],"this":[49,52],"issue.":[50],"In":[51],"paper":[53],"we":[54],"present":[55],"new":[57],"codec":[60,85],"that:":[61],"1)":[62],"supports":[63,67],"variable":[64],"bitrates":[65],"2)":[66],"packet":[68],"losses":[69],"up":[71],"120":[73],"ms":[74],"3)":[76],"operate":[78],"low-compute":[80],"high-compute":[82],"modes.":[83],"Our":[84,132],"hierarchical":[88],"VQ-VAE":[89],"(HVQVAE)":[90],"for":[91,108,129],"encoding":[92],"decoding":[94],"spectral":[95],"features":[96,102],"different":[98],"The":[100],"decoded":[101],"are":[103],"fed":[104],"vocoder":[107,128],"synthesis.":[110,131],"Depending":[111],"upon":[112],"end":[114],"user\u2019s":[115],"computing":[116],"resources,":[117],"decoder":[119],"either":[120],"powerful":[123],"WaveRNN":[124,139],"or":[125],"parametric":[127],"experiments":[133],"demonstrate":[134],"that":[135],"our":[136],"HVQVAE":[137],"+":[138],"setup":[140],"achieves":[141],"high":[142],"audio":[143],"quality.":[144]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
