{"id":"https://openalex.org/W4407665823","doi":"https://doi.org/10.3390/a18020108","title":"Beyond Spectrograms: Rethinking Audio Classification from EnCodec\u2019s Latent Space","display_name":"Beyond Spectrograms: Rethinking Audio Classification from EnCodec\u2019s Latent Space","publication_year":2025,"publication_date":"2025-02-16","ids":{"openalex":"https://openalex.org/W4407665823","doi":"https://doi.org/10.3390/a18020108"},"language":"en","primary_location":{"id":"doi:10.3390/a18020108","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a18020108","pdf_url":"https://www.mdpi.com/1999-4893/18/2/108/pdf?version=1739782290","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/18/2/108/pdf?version=1739782290","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083496615","display_name":"Jorge Perianez-Pascual","orcid":"https://orcid.org/0000-0001-8708-4734"},"institutions":[{"id":"https://openalex.org/I80606768","display_name":"Universidad de Extremadura","ror":"https://ror.org/0174shg90","country_code":"ES","type":"education","lineage":["https://openalex.org/I80606768"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jorge Perianez-Pascual","raw_affiliation_strings":["Instituto de Investigaci\u00f3n en Tecnolog\u00edas Inform\u00e1ticas Aplicadas (INTIA), Universidad de Extremadura, Av. Universidad s/n, 10003 C\u00e1ceres, Spain"],"affiliations":[{"raw_affiliation_string":"Instituto de Investigaci\u00f3n en Tecnolog\u00edas Inform\u00e1ticas Aplicadas (INTIA), Universidad de Extremadura, Av. Universidad s/n, 10003 C\u00e1ceres, Spain","institution_ids":["https://openalex.org/I80606768"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088907689","display_name":"Juan D. Guti\u00e9rrez","orcid":"https://orcid.org/0000-0002-1024-6202"},"institutions":[{"id":"https://openalex.org/I200284239","display_name":"Universidade de Santiago de Compostela","ror":"https://ror.org/030eybx10","country_code":"ES","type":"education","lineage":["https://openalex.org/I200284239"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Juan D. Guti\u00e9rrez","raw_affiliation_strings":["Department of Electronics and Computer Science, Universidad de Santiago de Compostela, R\u00faa Benigno Ledo, 27002 Lugo, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Electronics and Computer Science, Universidad de Santiago de Compostela, R\u00faa Benigno Ledo, 27002 Lugo, Spain","institution_ids":["https://openalex.org/I200284239"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093790254","display_name":"Laura Escobar-Encinas","orcid":null},"institutions":[{"id":"https://openalex.org/I80606768","display_name":"Universidad de Extremadura","ror":"https://ror.org/0174shg90","country_code":"ES","type":"education","lineage":["https://openalex.org/I80606768"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Laura Escobar-Encinas","raw_affiliation_strings":["Department of Computers and Telematics Systems Engineering, Universidad de Extremadura, Escuela Polit\u00e9cnica, 10003 C\u00e1ceres, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Computers and Telematics Systems Engineering, Universidad de Extremadura, Escuela Polit\u00e9cnica, 10003 C\u00e1ceres, Spain","institution_ids":["https://openalex.org/I80606768"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009435283","display_name":"\u00c1lvaro Rubio\u2010Largo","orcid":"https://orcid.org/0000-0003-2999-4304"},"institutions":[{"id":"https://openalex.org/I80606768","display_name":"Universidad de Extremadura","ror":"https://ror.org/0174shg90","country_code":"ES","type":"education","lineage":["https://openalex.org/I80606768"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"\u00c1lvaro Rubio-Largo","raw_affiliation_strings":["Department of Computers and Telematics Systems Engineering, Universidad de Extremadura, Escuela Polit\u00e9cnica, 10003 C\u00e1ceres, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Computers and Telematics Systems Engineering, Universidad de Extremadura, Escuela Polit\u00e9cnica, 10003 C\u00e1ceres, Spain","institution_ids":["https://openalex.org/I80606768"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000819463","display_name":"Roberto Rodr\u00edguez-Echeverr\u00eda","orcid":"https://orcid.org/0000-0002-6545-0913"},"institutions":[{"id":"https://openalex.org/I80606768","display_name":"Universidad de Extremadura","ror":"https://ror.org/0174shg90","country_code":"ES","type":"education","lineage":["https://openalex.org/I80606768"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Roberto Rodriguez-Echeverria","raw_affiliation_strings":["Instituto de Investigaci\u00f3n en Tecnolog\u00edas Inform\u00e1ticas Aplicadas (INTIA), Universidad de Extremadura, Av. Universidad s/n, 10003 C\u00e1ceres, Spain"],"affiliations":[{"raw_affiliation_string":"Instituto de Investigaci\u00f3n en Tecnolog\u00edas Inform\u00e1ticas Aplicadas (INTIA), Universidad de Extremadura, Av. Universidad s/n, 10003 C\u00e1ceres, Spain","institution_ids":["https://openalex.org/I80606768"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5009435283"],"corresponding_institution_ids":["https://openalex.org/I80606768"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":2.8148,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.879144,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"18","issue":"2","first_page":"108","last_page":"108"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.982200026512146,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.982200026512146,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8515310287475586},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.6528107523918152},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.48015421628952026},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3786145746707916}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8515310287475586},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.6528107523918152},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48015421628952026},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3786145746707916},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/a18020108","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a18020108","pdf_url":"https://www.mdpi.com/1999-4893/18/2/108/pdf?version=1739782290","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:dehesa.unex.es:10662/25510","is_oa":false,"landing_page_url":"http://hdl.handle.net/10662/25510","pdf_url":null,"source":{"id":"https://openalex.org/S4306401011","display_name":"Institutional Repository University of Extremadura (University of Extremadura)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I80606768","host_organization_name":"Universidad de Extremadura","host_organization_lineage":["https://openalex.org/I80606768"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"publishedVersion"},{"id":"pmh:oai:doaj.org/article:2442bf4b95a048718adc1bc3a725d9a0","is_oa":true,"landing_page_url":"https://doaj.org/article/2442bf4b95a048718adc1bc3a725d9a0","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 18, Iss 2, p 108 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/a18020108","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a18020108","pdf_url":"https://www.mdpi.com/1999-4893/18/2/108/pdf?version=1739782290","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3463879373","display_name":null,"funder_award_id":"NextGenerationEU/PRTR","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G3480869486","display_name":null,"funder_award_id":"13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G5080475149","display_name":null,"funder_award_id":"10.13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G661330594","display_name":null,"funder_award_id":"00110","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G6685425346","display_name":null,"funder_award_id":"0011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7084143925","display_name":null,"funder_award_id":"AEI/10","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7535663061","display_name":null,"funder_award_id":"AEI/10.","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G8051717526","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8260616629","display_name":null,"funder_award_id":"011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320335598","display_name":"Agencia Estatal de Investigaci\u00f3n","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4407665823.pdf","grobid_xml":"https://content.openalex.org/works/W4407665823.grobid-xml"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W2052666245","https://openalex.org/W2117497587","https://openalex.org/W2133824856","https://openalex.org/W2137637927","https://openalex.org/W2191779130","https://openalex.org/W2192412620","https://openalex.org/W3094550259","https://openalex.org/W3095095816","https://openalex.org/W3205475937","https://openalex.org/W3215615641","https://openalex.org/W4372270198"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W2088854863","https://openalex.org/W4402568167","https://openalex.org/W3179495260","https://openalex.org/W1976719989"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,44,77],"novel":[4,142],"approach":[5,87],"to":[6,63,91],"audio":[7,18,30,120],"classification":[8,35,56,121],"leveraging":[9],"the":[10,23,38,73,108,125],"latent":[11,25,112],"representation":[12,27,79,113],"generated":[13],"by":[14],"Meta\u2019s":[15],"EnCodec":[16],"neural":[17,47],"codec.":[19],"We":[20,42,123],"hypothesize":[21],"that":[22,85],"compressed":[24],"space":[26],"captures":[28],"essential":[29],"features":[31],"more":[32],"suitable":[33],"for":[34,49,114],"tasks":[36],"than":[37],"traditional":[39,135],"spectrogram-based":[40,78,136],"approaches.":[41],"train":[43],"vanilla":[45],"convolutional":[46],"network":[48,75],"music":[50],"genre,":[51],"speech/music,":[52],"and":[53,99,117,130],"environmental":[54],"sound":[55],"using":[57,76],"EnCodec\u2019s":[58,111,128],"encoder":[59],"output":[60,129],"as":[61,80],"input":[62],"validate":[64],"this.":[65],"Then,":[66],"we":[67],"compare":[68,131],"its":[69,132],"performance":[70,133],"training":[71],"with":[72],"same":[74],"input.":[81],"Our":[82],"experiments":[83],"demonstrate":[84],"this":[86,141],"achieves":[88],"comparable":[89],"accuracy":[90],"state-of-the-art":[92],"methods":[93],"while":[94],"exhibiting":[95],"significantly":[96],"faster":[97],"convergence":[98],"reduced":[100],"computational":[101],"load":[102],"during":[103],"training.":[104],"These":[105],"findings":[106],"suggest":[107],"potential":[109],"of":[110,127],"efficient,":[115],"faster,":[116],"less":[118],"expensive":[119],"applications.":[122],"analyze":[124],"characteristics":[126],"against":[134],"approaches,":[137],"providing":[138],"insights":[139],"into":[140],"approach\u2019s":[143],"advantages.":[144]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
