{"id":"https://openalex.org/W7125798723","doi":"https://doi.org/10.48550/arxiv.2601.17517","title":"EuleroDec: A Complex-Valued RVQ-VAE for Efficient and Robust Audio Coding","display_name":"EuleroDec: A Complex-Valued RVQ-VAE for Efficient and Robust Audio Coding","publication_year":2026,"publication_date":"2026-01-24","ids":{"openalex":"https://openalex.org/W7125798723","doi":"https://doi.org/10.48550/arxiv.2601.17517"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2601.17517","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124036675","display_name":"Luca Cerovaz","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cerovaz, Luca","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123898128","display_name":"Michele Mancusi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mancusi, Michele","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5123966378","display_name":"Emanuele Rodol\u00e0","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rodol\u00e0, Emanuele","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5124036675"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.19760000705718994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.19760000705718994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.1729000061750412,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.14409999549388885,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.7148000001907349},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.4855000078678131},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.4514999985694885},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.43309998512268066},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.40380001068115234},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4000999927520752},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.37439998984336853},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.3720000088214874},{"id":"https://openalex.org/keywords/audio-signal-flow","display_name":"Audio signal flow","score":0.3677999973297119},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.3675999939441681}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7989000082015991},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.7148000001907349},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5945000052452087},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.4855000078678131},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.4514999985694885},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.43309998512268066},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.40380001068115234},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4000999927520752},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.37439998984336853},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.3720000088214874},{"id":"https://openalex.org/C167940747","wikidata":"https://www.wikidata.org/wiki/Q63727227","display_name":"Audio signal flow","level":5,"score":0.3677999973297119},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.3675999939441681},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.3666999936103821},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.3626999855041504},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.36230000853538513},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.36039999127388},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3513000011444092},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.3481000065803528},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.3249000012874603},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.31700000166893005},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C98526533","wikidata":"https://www.wikidata.org/wiki/Q1691938","display_name":"Sub-band coding","level":3,"score":0.28220000863075256},{"id":"https://openalex.org/C87687168","wikidata":"https://www.wikidata.org/wiki/Q173114","display_name":"Digital audio","level":4,"score":0.2777000069618225},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.2759000062942505},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2721000015735626},{"id":"https://openalex.org/C177067256","wikidata":"https://www.wikidata.org/wiki/Q4676210","display_name":"Adaptive Multi-Rate audio codec","level":4,"score":0.2621000111103058},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.2578999996185303},{"id":"https://openalex.org/C534633266","wikidata":"https://www.wikidata.org/wiki/Q4774317","display_name":"Anti-aliasing","level":5,"score":0.25619998574256897},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.25209999084472656},{"id":"https://openalex.org/C128422554","wikidata":"https://www.wikidata.org/wiki/Q20077126","display_name":"Sound recording and reproduction","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2601.17517","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2601.17517","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.17517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2601.17517","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.7296066880226135,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Audio":[0],"codecs":[1,43],"power":[2,82],"discrete":[3],"music":[4,7],"generative":[5],"modelling,":[6],"streaming":[8],"and":[9,73,107,111,126],"immersive":[10],"media":[11],"by":[12,148],"shrinking":[13],"PCM":[14],"audio":[15,85,96],"to":[16,63,76,132],"bandwidth-friendly":[17],"bit-rates.":[18],"Recent":[19],"works":[20],"have":[21],"gravitated":[22],"towards":[23],"processing":[24],"in":[25],"the":[26,61,68,79,84,103],"spectral":[27],"domain;":[28],"however,":[29],"spectrogram-domains":[30],"typically":[31],"struggle":[32],"with":[33],"phase":[34,46],"modeling":[35],"which":[36],"is":[37,153],"naturally":[38],"complex-valued.":[39],"Most":[40],"frequency-domain":[41],"neural":[42],"either":[44],"disregard":[45],"information":[47],"or":[48,116,120],"encode":[49],"it":[50],"as":[51],"two":[52],"separate":[53],"real-valued":[54],"channels,":[55],"limiting":[56],"spatial":[57],"fidelity.":[58],"This":[59],"entails":[60],"need":[62],"introduce":[64,91],"adversarial":[65,109],"discriminators":[66,110],"at":[67],"expense":[69],"of":[70,83,139,141,151],"convergence":[71],"speed":[72],"training":[74,146],"stability":[75],"compensate":[77],"for":[78,137],"inadequate":[80],"representation":[81],"signal.":[86],"In":[87],"this":[88],"work":[89],"we":[90,118],"an":[92,149],"end-to-end":[93],"complex-valued":[94],"RVQ-VAE":[95],"codec":[97],"that":[98,135],"preserves":[99],"magnitude-phase":[100],"coupling":[101],"across":[102],"entire":[104],"analysis-quantization-synthesis":[105],"pipeline":[106],"removes":[108],"diffusion":[112,117],"post-filters.":[113],"Without":[114],"GANs":[115],"match":[119],"surpass":[121],"much":[122],"longer-trained":[123],"baselines":[124,134],"in-domain":[125],"reach":[127],"SOTA":[128],"out-of-domain":[129],"performance.":[130],"Compared":[131],"standard":[133],"train":[136],"hundreds":[138],"thousands":[140],"steps,":[142],"our":[143],"model":[144],"reducing":[145],"budget":[147],"order":[150],"magnitude":[152],"markedly":[154],"more":[155],"compute-efficient":[156],"while":[157],"preserving":[158],"high":[159],"perceptual":[160],"quality.":[161]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-01-28T00:00:00"}
