{"id":"https://openalex.org/W4372270198","doi":"https://doi.org/10.1109/icassp49357.2023.10096509","title":"Audiodec: An Open-Source Streaming High-Fidelity Neural Audio Codec","display_name":"Audiodec: An Open-Source Streaming High-Fidelity Neural Audio Codec","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372270198","doi":"https://doi.org/10.1109/icassp49357.2023.10096509"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096509","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096509","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2305.16608","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003177601","display_name":"Yi-Chiao Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yi-Chiao Wu","raw_affiliation_strings":["Meta Reality Labs Research,USA","Meta Reality Labs Research, USA"],"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research,USA","institution_ids":["https://openalex.org/I4210128585"]},{"raw_affiliation_string":"Meta Reality Labs Research, USA","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072548521","display_name":"Israel D. Gebru","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Israel D. Gebru","raw_affiliation_strings":["Meta Reality Labs Research,USA","Meta Reality Labs Research, USA"],"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research,USA","institution_ids":["https://openalex.org/I4210128585"]},{"raw_affiliation_string":"Meta Reality Labs Research, USA","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082706737","display_name":"Dejan Markovi\u0107","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dejan Markovi\u0107","raw_affiliation_strings":["Meta Reality Labs Research,USA","Meta Reality Labs Research, USA"],"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research,USA","institution_ids":["https://openalex.org/I4210128585"]},{"raw_affiliation_string":"Meta Reality Labs Research, USA","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029517860","display_name":"Alexander Richard","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Richard","raw_affiliation_strings":["Meta Reality Labs Research,USA","Meta Reality Labs Research, USA"],"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research,USA","institution_ids":["https://openalex.org/I4210128585"]},{"raw_affiliation_string":"Meta Reality Labs Research, USA","institution_ids":["https://openalex.org/I4210128585"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5003177601"],"corresponding_institution_ids":["https://openalex.org/I4210128585"],"apc_list":null,"apc_paid":null,"fwci":11.8272,"has_fulltext":true,"cited_by_count":60,"citation_normalized_percentile":{"value":0.99139958,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.9103724956512451},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8541210889816284},{"id":"https://openalex.org/keywords/adaptive-multi-rate-audio-codec","display_name":"Adaptive Multi-Rate audio codec","score":0.716702938079834},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.5302469730377197},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5241886973381042},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5217781662940979},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5136536955833435},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.508243978023529},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.4928024113178253},{"id":"https://openalex.org/keywords/codec2","display_name":"Codec2","score":0.4762535095214844},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.4485899806022644},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.38723695278167725},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.35498031973838806},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.33169472217559814},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.33025285601615906},{"id":"https://openalex.org/keywords/linear-predictive-coding","display_name":"Linear predictive coding","score":0.2814819812774658},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20896464586257935},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.17548233270645142},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08338046073913574}],"concepts":[{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.9103724956512451},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8541210889816284},{"id":"https://openalex.org/C177067256","wikidata":"https://www.wikidata.org/wiki/Q4676210","display_name":"Adaptive Multi-Rate audio codec","level":4,"score":0.716702938079834},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.5302469730377197},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5241886973381042},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5217781662940979},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5136536955833435},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.508243978023529},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.4928024113178253},{"id":"https://openalex.org/C75217168","wikidata":"https://www.wikidata.org/wiki/Q1105653","display_name":"Codec2","level":4,"score":0.4762535095214844},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4485899806022644},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.38723695278167725},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.35498031973838806},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.33169472217559814},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.33025285601615906},{"id":"https://openalex.org/C59883199","wikidata":"https://www.wikidata.org/wiki/Q1826438","display_name":"Linear predictive coding","level":3,"score":0.2814819812774658},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20896464586257935},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.17548233270645142},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08338046073913574},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096509","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096509","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2305.16608","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.16608","pdf_url":"https://arxiv.org/pdf/2305.16608","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2305.16608","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.16608","pdf_url":"https://arxiv.org/pdf/2305.16608","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4372270198.pdf","grobid_xml":"https://content.openalex.org/works/W4372270198.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W1481955708","https://openalex.org/W1553834069","https://openalex.org/W1556611829","https://openalex.org/W2002182716","https://openalex.org/W2020883660","https://openalex.org/W2031245628","https://openalex.org/W2037034710","https://openalex.org/W2129913307","https://openalex.org/W2130158387","https://openalex.org/W2141214687","https://openalex.org/W2151626637","https://openalex.org/W2159644785","https://openalex.org/W2165291881","https://openalex.org/W2168013545","https://openalex.org/W2168098717","https://openalex.org/W2309400744","https://openalex.org/W2471520273","https://openalex.org/W2618530766","https://openalex.org/W2752796333","https://openalex.org/W2757519008","https://openalex.org/W2775336875","https://openalex.org/W2935711438","https://openalex.org/W2963182577","https://openalex.org/W2963208781","https://openalex.org/W2963799213","https://openalex.org/W2964073500","https://openalex.org/W2970006822","https://openalex.org/W2972354707","https://openalex.org/W2972519044","https://openalex.org/W3092028330","https://openalex.org/W3124456579","https://openalex.org/W3186609711","https://openalex.org/W3188073270","https://openalex.org/W3198234802","https://openalex.org/W3215615641","https://openalex.org/W4225302959","https://openalex.org/W4284957875","https://openalex.org/W4285044837","https://openalex.org/W4287278740","https://openalex.org/W4312337341","https://openalex.org/W6630442970","https://openalex.org/W6633114069","https://openalex.org/W6680767324","https://openalex.org/W6683021324","https://openalex.org/W6760712927","https://openalex.org/W6767111847","https://openalex.org/W6783867762","https://openalex.org/W6798098866","https://openalex.org/W6936113694"],"related_works":["https://openalex.org/W2370747337","https://openalex.org/W1542588102","https://openalex.org/W2106874932","https://openalex.org/W2375583517","https://openalex.org/W2124860381","https://openalex.org/W10322252","https://openalex.org/W2524123961","https://openalex.org/W2151333624","https://openalex.org/W2496295964","https://openalex.org/W2289505355"],"abstract_inverted_index":{"A":[0],"good":[1],"audio":[2,75,123,149],"codec":[3,76,150],"for":[4,119,125,148],"live":[5],"applications":[6],"such":[7,121],"as":[8,30,32],"telecommunication":[9],"is":[10,22,116,143],"characterized":[11],"by":[12],"three":[13,83],"key":[14],"properties:":[15],"(1)":[16],"compression,":[17],"i.e.":[18,36],"the":[19,26,40,62,134],"bitrate":[20],"that":[21,77],"required":[23],"to":[24,43,47],"transmit":[25],"signal":[27,41],"should":[28],"be":[29,44],"low":[31],"possible;":[33],"(2)":[34],"latency,":[35],"encoding":[37],"and":[38,57,72,101,130],"decoding":[39],"needs":[42],"fast":[45],"enough":[46],"enable":[48],"communication":[49],"without":[50],"or":[51],"with":[52,103],"only":[53,98],"minimal":[54],"noticeable":[55],"delay;":[56],"(3)":[58],"reconstruction":[59],"quality":[60],"of":[61],"signal.":[63],"In":[64],"this":[65],"work,":[66],"we":[67],"propose":[68],"an":[69],"open-source,":[70],"streamable,":[71],"real-time":[73],"neural":[74,122],"achieves":[78],"strong":[79],"performance":[80],"along":[81],"all":[82],"axes:":[84],"it":[85],"can":[86],"reconstruct":[87],"highly":[88],"natural":[89],"sounding":[90],"48":[91],"kHz":[92],"speech":[93],"signals":[94],"while":[95],"operating":[96],"at":[97],"12":[99],"kbps":[100],"running":[102],"less":[104],"than":[105],"6":[106],"ms":[107,109],"(GPU)/10":[108],"(CPU)":[110],"latency.":[111],"An":[112],"efficient":[113],"training":[114],"paradigm":[115],"also":[117],"demonstrated":[118],"developing":[120],"codecs":[124],"real-world":[126],"scenarios.":[127],"Both":[128],"objective":[129],"subjective":[131],"evaluations":[132],"using":[133],"VCTK":[135],"corpus":[136],"are":[137],"provided.":[138],"To":[139],"sum":[140],"up,":[141],"AudioDec":[142],"a":[144],"well-developed":[145],"plug-and-play":[146],"benchmark":[147],"applications.":[151]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":27},{"year":2024,"cited_by_count":29},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
