{"id":"https://openalex.org/W7124909398","doi":"https://doi.org/10.1109/lsp.2026.3655351","title":"R3VQ: Redundancy-Reduced Residual Vector Quantization for Low-Bitrate Neural Speech Coding","display_name":"R3VQ: Redundancy-Reduced Residual Vector Quantization for Low-Bitrate Neural Speech Coding","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7124909398","doi":"https://doi.org/10.1109/lsp.2026.3655351"},"language":null,"primary_location":{"id":"doi:10.1109/lsp.2026.3655351","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2026.3655351","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004395418","display_name":"Eunkyun Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I39534123","display_name":"Gwangju Institute of Science and Technology","ror":"https://ror.org/024kbgz78","country_code":"KR","type":"education","lineage":["https://openalex.org/I39534123"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Eunkyun Lee","raw_affiliation_strings":["Gwangju Institute of Science and Technology, Gwangju, South Korea"],"raw_orcid":"https://orcid.org/0009-0000-5576-7792","affiliations":[{"raw_affiliation_string":"Gwangju Institute of Science and Technology, Gwangju, South Korea","institution_ids":["https://openalex.org/I39534123"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067821873","display_name":"Jongwook Chae","orcid":null},"institutions":[{"id":"https://openalex.org/I39534123","display_name":"Gwangju Institute of Science and Technology","ror":"https://ror.org/024kbgz78","country_code":"KR","type":"education","lineage":["https://openalex.org/I39534123"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jongwook Chae","raw_affiliation_strings":["Gwangju Institute of Science and Technology, Gwangju, South Korea"],"raw_orcid":"https://orcid.org/0000-0001-8900-251X","affiliations":[{"raw_affiliation_string":"Gwangju Institute of Science and Technology, Gwangju, South Korea","institution_ids":["https://openalex.org/I39534123"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123409703","display_name":"Sooyoung Park","orcid":null},"institutions":[{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sooyoung Park","raw_affiliation_strings":["ETRI, Daejeon, South Korea"],"raw_orcid":"https://orcid.org/0009-0007-4138-3561","affiliations":[{"raw_affiliation_string":"ETRI, Daejeon, South Korea","institution_ids":["https://openalex.org/I142401562"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123404573","display_name":"Jong Won Shin","orcid":null},"institutions":[{"id":"https://openalex.org/I39534123","display_name":"Gwangju Institute of Science and Technology","ror":"https://ror.org/024kbgz78","country_code":"KR","type":"education","lineage":["https://openalex.org/I39534123"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jong Won Shin","raw_affiliation_strings":["Gwangju Institute of Science and Technology, Gwangju, South Korea"],"raw_orcid":"https://orcid.org/0000-0002-8910-0264","affiliations":[{"raw_affiliation_string":"Gwangju Institute of Science and Technology, Gwangju, South Korea","institution_ids":["https://openalex.org/I39534123"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07663935,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"33","issue":null,"first_page":"693","last_page":"697"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.34860000014305115,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.34860000014305115,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.2937999963760376,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.23720000684261322,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.8131999969482422},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.7900999784469604},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.7788000106811523},{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.713100016117096},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6326000094413757},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5759999752044678},{"id":"https://openalex.org/keywords/linear-predictive-coding","display_name":"Linear predictive coding","score":0.546999990940094},{"id":"https://openalex.org/keywords/code-excited-linear-prediction","display_name":"Code-excited linear prediction","score":0.5159000158309937},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.46779999136924744},{"id":"https://openalex.org/keywords/adaptive-multi-rate-audio-codec","display_name":"Adaptive Multi-Rate audio codec","score":0.44020000100135803}],"concepts":[{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.8131999969482422},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.7900999784469604},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.7788000106811523},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7347999811172485},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7264000177383423},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.713100016117096},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6326000094413757},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5759999752044678},{"id":"https://openalex.org/C59883199","wikidata":"https://www.wikidata.org/wiki/Q1826438","display_name":"Linear predictive coding","level":3,"score":0.546999990940094},{"id":"https://openalex.org/C105964291","wikidata":"https://www.wikidata.org/wiki/Q856184","display_name":"Code-excited linear prediction","level":4,"score":0.5159000158309937},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.46779999136924744},{"id":"https://openalex.org/C177067256","wikidata":"https://www.wikidata.org/wiki/Q4676210","display_name":"Adaptive Multi-Rate audio codec","level":4,"score":0.44020000100135803},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.43639999628067017},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40860000252723694},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40799999237060547},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.39570000767707825},{"id":"https://openalex.org/C40567965","wikidata":"https://www.wikidata.org/wiki/Q1820283","display_name":"Learning vector quantization","level":3,"score":0.38190001249313354},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.37940001487731934},{"id":"https://openalex.org/C75217168","wikidata":"https://www.wikidata.org/wiki/Q1105653","display_name":"Codec2","level":4,"score":0.3617999851703644},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.33739998936653137},{"id":"https://openalex.org/C93372532","wikidata":"https://www.wikidata.org/wiki/Q6552455","display_name":"Linde\u2013Buzo\u2013Gray algorithm","level":3,"score":0.32989999651908875},{"id":"https://openalex.org/C138807605","wikidata":"https://www.wikidata.org/wiki/Q7917845","display_name":"Vector sum excited linear prediction","level":5,"score":0.3215999901294708},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.31709998846054077},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2897000014781952},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.2815999984741211},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28119999170303345},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C80167644","wikidata":"https://www.wikidata.org/wiki/Q463990","display_name":"Harmonic Vector Excitation Coding","level":3,"score":0.25609999895095825},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.25290000438690186},{"id":"https://openalex.org/C2778192920","wikidata":"https://www.wikidata.org/wiki/Q16874989","display_name":"Signal compression","level":4,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2026.3655351","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2026.3655351","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322093","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2064675550","https://openalex.org/W2935711438","https://openalex.org/W2963091184","https://openalex.org/W2972359262","https://openalex.org/W2972519044","https://openalex.org/W3037038648","https://openalex.org/W3161411634","https://openalex.org/W3163243746","https://openalex.org/W3197312245","https://openalex.org/W3198785558","https://openalex.org/W3215615641","https://openalex.org/W4225860133","https://openalex.org/W4296209708","https://openalex.org/W4372270198","https://openalex.org/W4372348514","https://openalex.org/W4375869380","https://openalex.org/W4377231659","https://openalex.org/W4386764386","https://openalex.org/W4392903014","https://openalex.org/W4392903975","https://openalex.org/W4392931975","https://openalex.org/W4402111482","https://openalex.org/W4402115964","https://openalex.org/W4402915905","https://openalex.org/W4404480113","https://openalex.org/W4406417959","https://openalex.org/W4406461725","https://openalex.org/W4409355993","https://openalex.org/W4411232650","https://openalex.org/W4412889674"],"related_works":[],"abstract_inverted_index":{"Neural":[0],"speech":[1,56,114,138,164],"and":[2,25,58,166,173],"audio":[3,12],"codecs":[4,57],"have":[5],"demonstrated":[6],"decent":[7],"quality":[8,165],"of":[9,18,92,100,111,135],"the":[10,38,42,48,72,80,93,98,101,109,112,116,120,133,136,141,147,157],"decoded":[11],"at":[13],"low":[14],"bitrates.":[15],"They":[16],"consist":[17],"three":[19],"parts,":[20],"an":[21],"encoder,":[22],"a":[23,26,84,88,126,152],"decoder,":[24],"quantizer.":[27],"Residual":[28],"vector":[29,34,75],"quantization":[30,35,76],"(RVQ)":[31],"or":[32],"multi-stage":[33],"in":[36,47,53,119,159],"which":[37,78],"residual":[39,74,102],"signal":[40,103],"from":[41,115],"previous":[43,121],"stage":[44,50],"is":[45,51,95],"quantized":[46,106,117],"next":[49],"employed":[52],"many":[54],"neural":[55,85,137],"has":[59],"exhibited":[60],"good":[61],"performance":[62],"while":[63],"providing":[64],"bitrate":[65],"scalability.":[66],"In":[67],"this":[68],"letter,":[69],"we":[70],"propose":[71],"redundancy-reduced":[73],"(R3VQ)":[77],"improves":[79],"RVQ":[81,158],"by":[82,107],"inserting":[83],"network":[86],"called":[87],"refiner.":[89],"The":[90],"role":[91],"refiner":[94],"to":[96,104],"reduce":[97],"power":[99],"be":[105],"enhancing":[108],"estimate":[110],"original":[113],"signals":[118],"stages.":[122],"We":[123],"also":[124],"present":[125],"part-wise":[127],"(PW)":[128],"training":[129,134,154],"scheme":[130,155],"suitable":[131],"for":[132,163],"codec":[139],"with":[140,151,170],"R3VQ.":[142],"Experimental":[143],"results":[144],"showed":[145],"that":[146],"proposed":[148],"R3VQ":[149],"trained":[150],"PW":[153],"outperformed":[156],"both":[160],"objective":[161],"measures":[162],"subjective":[167],"MUltiple":[168],"Stimuli":[169],"Hidden":[171],"Reference":[172],"Anchor":[174],"(MUSHRA)":[175],"test.":[176]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-21T00:00:00"}
