{"id":"https://openalex.org/W4403511255","doi":"https://doi.org/10.1109/jstsp.2024.3482972","title":"DRED: Deep REDundancy Coding of Speech Using a Rate-Distortion-Optimized Variational Autoencoder","display_name":"DRED: Deep REDundancy Coding of Speech Using a Rate-Distortion-Optimized Variational Autoencoder","publication_year":2024,"publication_date":"2024-10-17","ids":{"openalex":"https://openalex.org/W4403511255","doi":"https://doi.org/10.1109/jstsp.2024.3482972"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2024.3482972","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2024.3482972","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021269794","display_name":"Jean-Marc Valin","orcid":"https://orcid.org/0000-0002-9883-6927"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jean-Marc Valin","raw_affiliation_strings":["Xiph.Org Foundation, Jaffrey, NH, USA","Xiph.Org Foundation"],"affiliations":[{"raw_affiliation_string":"Xiph.Org Foundation, Jaffrey, NH, USA","institution_ids":[]},{"raw_affiliation_string":"Xiph.Org Foundation","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106370760","display_name":"Jan B\u00fcthe","orcid":"https://orcid.org/0009-0003-9684-1567"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jan B\u00fcthe","raw_affiliation_strings":["Xiph.Org Foundation, Jaffrey, NH, USA","Xiph.Org Foundation"],"affiliations":[{"raw_affiliation_string":"Xiph.Org Foundation, Jaffrey, NH, USA","institution_ids":[]},{"raw_affiliation_string":"Xiph.Org Foundation","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110745444","display_name":"Ahmed Mustafa","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ahmed Mustafa","raw_affiliation_strings":["Amazon Web Services, Palo Alto, CA, USA","Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087078857","display_name":"Michael Klingbeil","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Klingbeil","raw_affiliation_strings":["Amazon Web Services, Palo Alto, CA, USA","Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5021269794"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7895,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.73832241,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"18","issue":"8","first_page":"1441","last_page":"1447"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9641000032424927,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.839626133441925},{"id":"https://openalex.org/keywords/rate\u2013distortion-theory","display_name":"Rate\u2013distortion theory","score":0.6577582955360413},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.6455979347229004},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6405636072158813},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6030911207199097},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5274872183799744},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.45929545164108276},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.43909239768981934},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43868446350097656},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3941877484321594},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3819448947906494},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21006646752357483},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.19866669178009033},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.16258132457733154},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.14035707712173462},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.05879899859428406}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.839626133441925},{"id":"https://openalex.org/C64185310","wikidata":"https://www.wikidata.org/wiki/Q843483","display_name":"Rate\u2013distortion theory","level":3,"score":0.6577582955360413},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.6455979347229004},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6405636072158813},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6030911207199097},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5274872183799744},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.45929545164108276},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.43909239768981934},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43868446350097656},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3941877484321594},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3819448947906494},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21006646752357483},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.19866669178009033},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.16258132457733154},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.14035707712173462},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.05879899859428406},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jstsp.2024.3482972","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2024.3482972","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W2063678710","https://openalex.org/W2151912295","https://openalex.org/W2406280060","https://openalex.org/W2750167318","https://openalex.org/W2752796333","https://openalex.org/W2775336875","https://openalex.org/W2895654193","https://openalex.org/W2963091184","https://openalex.org/W2963208781","https://openalex.org/W2963446712","https://openalex.org/W2964199361","https://openalex.org/W2972519044","https://openalex.org/W3025844872","https://openalex.org/W3092791109","https://openalex.org/W3095497211","https://openalex.org/W3160576174","https://openalex.org/W3198020407","https://openalex.org/W3215615641","https://openalex.org/W4296069296","https://openalex.org/W4297841420","https://openalex.org/W4372190822","https://openalex.org/W4372348514","https://openalex.org/W4375868823","https://openalex.org/W4392931427","https://openalex.org/W4401452051","https://openalex.org/W6639363673","https://openalex.org/W6640963894","https://openalex.org/W6690026940","https://openalex.org/W6777664437","https://openalex.org/W6777781272","https://openalex.org/W6778625279","https://openalex.org/W6778672582","https://openalex.org/W6779192484","https://openalex.org/W6793364766","https://openalex.org/W6802838302"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2159052453","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W4386815338","https://openalex.org/W2145836866","https://openalex.org/W2002925467"],"abstract_inverted_index":{"Despite":[0],"recent":[1],"advancements":[2],"in":[3,18,26,42,126],"packet":[4,12,56],"loss":[5,13,45],"concealment":[6],"(PLC)":[7],"using":[8,104],"deep":[9,75],"learning":[10],"techniques,":[11],"remains":[14],"a":[15,60,65,74],"significant":[16],"challenge":[17],"real-time":[19],"speech":[20,76],"communication.":[21],"Redundancy":[22],"has":[23],"been":[24],"used":[25],"the":[27,31,43,80,114,127],"past":[28],"to":[29,72,101],"recover":[30],"missing":[32],"information":[33],"during":[34],"losses.":[35],"However,":[36],"conventional":[37],"redundancy":[38,87,103],"techniques":[39],"are":[40,51],"limited":[41],"maximum":[44],"duration":[46],"they":[47],"can":[48,98],"cover":[49],"and":[50],"often":[52],"unsuitable":[53],"for":[54,79],"burst":[55],"loss.":[57],"We":[58,119],"propose":[59],"new":[61],"approach":[62],"based":[63],"on":[64],"rate-distortion-optimized":[66],"variational":[67],"autoencoder":[68],"(RDO-VAE),":[69],"allowing":[70],"us":[71],"optimize":[73],"compression":[77],"algorithm":[78,97],"task":[81],"of":[82,86,129],"encoding":[83],"large":[84],"amounts":[85],"at":[88],"very":[89],"low":[90],"bitrate.":[91],"The":[92],"proposed":[93],"Deep":[94],"REDundancy":[95],"(DRED)":[96],"transmit":[99],"up":[100],"50x":[102],"less":[105],"than":[106],"32":[107],"kb/s.":[108],"Results":[109],"show":[110],"that":[111],"DRED":[112],"outperforms":[113],"existing":[115],"Opus":[116],"codec":[117],"redundancy.":[118],"also":[120],"demonstrate":[121],"its":[122],"benefits":[123],"when":[124],"operating":[125],"context":[128],"WebRTC.":[130]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
