{"id":"https://openalex.org/W4402402508","doi":"https://doi.org/10.1109/icasspw62465.2024.10669909","title":"Leveraging Pre-Trained Autoencoders for Interpretable Prototype Learning of Music Audio","display_name":"Leveraging Pre-Trained Autoencoders for Interpretable Prototype Learning of Music Audio","publication_year":2024,"publication_date":"2024-04-14","ids":{"openalex":"https://openalex.org/W4402402508","doi":"https://doi.org/10.1109/icasspw62465.2024.10669909"},"language":"en","primary_location":{"id":"doi:10.1109/icasspw62465.2024.10669909","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw62465.2024.10669909","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085164793","display_name":"Pablo Alonso-Jim\u00e9nez","orcid":"https://orcid.org/0000-0002-4121-5089"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Universitat Pompeu Fabra","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Pablo Alonso-Jim\u00e9nez","raw_affiliation_strings":["Universitat Pompeu Fabra,Music Technology Group,Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra,Music Technology Group,Spain","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077063899","display_name":"Leonardo Pepino","orcid":"https://orcid.org/0000-0001-5037-3700"},"institutions":[{"id":"https://openalex.org/I151201029","display_name":"Consejo Nacional de Investigaciones Cient\u00edficas y T\u00e9cnicas","ror":"https://ror.org/03cqe8w59","country_code":"AR","type":"government","lineage":["https://openalex.org/I151201029","https://openalex.org/I4210123736","https://openalex.org/I4387155568"]},{"id":"https://openalex.org/I4210144290","display_name":"Institute of Astronomy and Space Physics","ror":"https://ror.org/03rq94151","country_code":"AR","type":"facility","lineage":["https://openalex.org/I151201029","https://openalex.org/I24354313","https://openalex.org/I4210123736","https://openalex.org/I4210144290","https://openalex.org/I4387155568"]}],"countries":["AR"],"is_corresponding":false,"raw_author_name":"Leonardo Pepino","raw_affiliation_strings":["Instituto de Investigaci&#x00F3;n en Ciencias de la Computaci&#x00F3;n (ICC), CONICET-UBA,Argentina"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Instituto de Investigaci&#x00F3;n en Ciencias de la Computaci&#x00F3;n (ICC), CONICET-UBA,Argentina","institution_ids":["https://openalex.org/I151201029","https://openalex.org/I4210144290"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057436494","display_name":"Roser Batlle-Roca","orcid":"https://orcid.org/0000-0003-3591-9378"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Universitat Pompeu Fabra","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Roser Batlle-Roca","raw_affiliation_strings":["Universitat Pompeu Fabra,Music Technology Group,Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra,Music Technology Group,Spain","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075959503","display_name":"Pablo Zinemanas","orcid":"https://orcid.org/0000-0002-9372-730X"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Universitat Pompeu Fabra","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Pablo Zinemanas","raw_affiliation_strings":["Universitat Pompeu Fabra,Music Technology Group,Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra,Music Technology Group,Spain","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004451369","display_name":"Dmitry Bogdanov","orcid":"https://orcid.org/0000-0002-9469-0633"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Universitat Pompeu Fabra","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Dmitry Bogdanov","raw_affiliation_strings":["Universitat Pompeu Fabra,Music Technology Group,Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra,Music Technology Group,Spain","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006479715","display_name":"Xavier Serra","orcid":"https://orcid.org/0000-0003-1395-2345"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Universitat Pompeu Fabra","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Xavier Serra","raw_affiliation_strings":["Universitat Pompeu Fabra,Music Technology Group,Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra,Music Technology Group,Spain","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023777950","display_name":"Mart\u00edn Rocamora","orcid":"https://orcid.org/0000-0003-3183-9717"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Universitat Pompeu Fabra","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Mart\u00edn Rocamora","raw_affiliation_strings":["Universitat Pompeu Fabra,Music Technology Group,Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra,Music Technology Group,Spain","institution_ids":["https://openalex.org/I170486558"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9172,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.73713498,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"833","last_page":"837"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9707000255584717,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9557999968528748,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7483863830566406},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5601502060890198},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4932834506034851},{"id":"https://openalex.org/keywords/sound-recording-and-reproduction","display_name":"Sound recording and reproduction","score":0.4348599314689636},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.42660921812057495},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.398185670375824},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.20615404844284058}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7483863830566406},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5601502060890198},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4932834506034851},{"id":"https://openalex.org/C128422554","wikidata":"https://www.wikidata.org/wiki/Q20077126","display_name":"Sound recording and reproduction","level":2,"score":0.4348599314689636},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.42660921812057495},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.398185670375824},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.20615404844284058},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icasspw62465.2024.10669909","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw62465.2024.10669909","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"},{"id":"pmh:oai:repositori-api.upf.edu:10230/59220","is_oa":false,"landing_page_url":"http://hdl.handle.net/10230/59220","pdf_url":null,"source":{"id":"https://openalex.org/S4306402615","display_name":"Repositori digital de la UPF (Universitat Pompeu Fabra)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I170486558","host_organization_name":"Universitat Pompeu Fabra","host_organization_lineage":["https://openalex.org/I170486558"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/acceptedVersion"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1950706092","https://openalex.org/W2059652044","https://openalex.org/W2159561775","https://openalex.org/W2765813195","https://openalex.org/W2945837706","https://openalex.org/W2952395326","https://openalex.org/W2964075883","https://openalex.org/W2964218314","https://openalex.org/W2981731882","https://openalex.org/W3133695494","https://openalex.org/W3146622378","https://openalex.org/W3182900969","https://openalex.org/W3215615641","https://openalex.org/W4286432986","https://openalex.org/W4300957348","https://openalex.org/W4307323391","https://openalex.org/W4380551955","https://openalex.org/W4386794615","https://openalex.org/W4387224530","https://openalex.org/W4389450885","https://openalex.org/W6602337844","https://openalex.org/W6635859925","https://openalex.org/W6712692208","https://openalex.org/W6732646663","https://openalex.org/W6746228475","https://openalex.org/W6764574124","https://openalex.org/W6765778736","https://openalex.org/W6765910423","https://openalex.org/W6769017931","https://openalex.org/W6785932716","https://openalex.org/W6809884996","https://openalex.org/W6844040096","https://openalex.org/W6853515095","https://openalex.org/W6856165946"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"We":[0,88,121],"present":[1],"PECMAE":[2],"an":[3,26],"interpretable":[4],"model":[5,15],"for":[6,65,94],"music":[7,95],"audio":[8],"classification":[9,97],"based":[10,17],"on":[11,18,49,68,92],"prototype":[12],"learning.":[13],"Our":[14],"is":[16],"a":[19,29,79,104,111],"previous":[20],"method,":[21],"APNet,":[22],"which":[23],"jointly":[24],"learns":[25],"autoencoder":[27,135],"and":[28,99,103],"prototypical":[30,118],"network.":[31],"Instead,":[32],"we":[33,76],"propose":[34],"to":[35,43,63],"decouple":[36],"both":[37],"training":[38,71],"processes.":[39],"This":[40],"enables":[41],"us":[42],"leverage":[44],"existing":[45],"self-supervised":[46],"autoencoders":[47],"pre-trained":[48],"much":[50],"larger":[51,105],"data":[52,72],"(EnCodecMAE),":[53],"providing":[54],"representations":[55],"with":[56,117,133],"better":[57],"generalization.":[58],"APNet":[59],"allows":[60,83],"prototypes\u2019":[61],"reconstruction":[62,84],"waveforms":[64],"interpretability":[66],"relying":[67],"the":[69,108,124,130,134,138,144,147],"nearest":[70],"samples.":[73],"In":[74],"contrast,":[75],"explore":[77],"using":[78],"diffusion":[80],"decoder":[81],"that":[82,123],"without":[85],"such":[86],"dependency.":[87],"evaluate":[89],"our":[90],"method":[91],"datasets":[93],"instrument":[96],"(Medley-Solos-DB)":[98],"genre":[100],"recognition":[101],"(GTZAN":[102],"in-house":[106],"dataset),":[107],"latter":[109],"being":[110],"more":[112],"challenging":[113],"task":[114],"not":[115],"addressed":[116],"networks":[119],"before.":[120],"find":[122],"prototype-based":[125],"models":[126],"preserve":[127],"most":[128],"of":[129,140,146],"performance":[131],"achieved":[132],"embeddings,":[136],"while":[137],"sonification":[139],"prototypes":[141],"benefits":[142],"understanding":[143],"behavior":[145],"classifier.":[148]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
