{"id":"https://openalex.org/W4408352802","doi":"https://doi.org/10.1109/icassp49660.2025.10887766","title":"Learning Music Audio Representations With Limited Data","display_name":"Learning Music Audio Representations With Limited Data","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408352802","doi":"https://doi.org/10.1109/icassp49660.2025.10887766"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10887766","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887766","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2505.06042","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107628528","display_name":"Christos Plachouras","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Christos Plachouras","raw_affiliation_strings":["Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084672392","display_name":"Emmanouil Benetos","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Emmanouil Benetos","raw_affiliation_strings":["Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048509747","display_name":"Johan Pauwels","orcid":"https://orcid.org/0000-0002-5805-7144"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Johan Pauwels","raw_affiliation_strings":["Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5107628528"],"corresponding_institution_ids":["https://openalex.org/I166337079"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04216589,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9768999814987183,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7362900972366333},{"id":"https://openalex.org/keywords/digital-audio","display_name":"Digital audio","score":0.46083977818489075},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.42602238059043884},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.41216734051704407},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.22825241088867188},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.2200162410736084}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7362900972366333},{"id":"https://openalex.org/C87687168","wikidata":"https://www.wikidata.org/wiki/Q173114","display_name":"Digital audio","level":4,"score":0.46083977818489075},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.42602238059043884},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41216734051704407},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.22825241088867188},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.2200162410736084}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icassp49660.2025.10887766","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887766","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2505.06042","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.06042","pdf_url":"https://arxiv.org/pdf/2505.06042","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:qmro.qmul.ac.uk:123456789/103944","is_oa":true,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/103944","pdf_url":"https://qmro.qmul.ac.uk/xmlui/bitstream/123456789/103944/2/Benetos%20Learning%20Music%20Audio%20Representations%20With%20Limited%20Data%202025%20Accepted.pdf","source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Proceeding"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2505.06042","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.06042","pdf_url":"https://arxiv.org/pdf/2505.06042","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1972567154","https://openalex.org/W2509065397","https://openalex.org/W2962845248","https://openalex.org/W2963232038","https://openalex.org/W2964218314","https://openalex.org/W2972570554","https://openalex.org/W3113738772","https://openalex.org/W3114268635","https://openalex.org/W3145450063","https://openalex.org/W3196974791","https://openalex.org/W3214117457","https://openalex.org/W4225300878","https://openalex.org/W4385322239","https://openalex.org/W6637373629","https://openalex.org/W6678969435","https://openalex.org/W6712594816","https://openalex.org/W6714030504","https://openalex.org/W6768465061","https://openalex.org/W6774314701","https://openalex.org/W6778572914","https://openalex.org/W6780333821","https://openalex.org/W6784333009","https://openalex.org/W6791537541","https://openalex.org/W6798064515","https://openalex.org/W6803637910","https://openalex.org/W6854553432","https://openalex.org/W6857718121","https://openalex.org/W6859841695","https://openalex.org/W6949171685"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2065933610"],"abstract_inverted_index":{"Large":[0],"deep-learning":[1],"models":[2,58,83,91,142],"for":[3,43,66],"music,":[4],"including":[5],"those":[6],"focused":[7],"on":[8,103,118],"learning":[9,86],"general-purpose":[10],"music":[11,45,51,80,90,120],"audio":[12,35,81],"representations,":[13],"are":[14,39],"often":[15],"assumed":[16],"to":[17,22,69,109,128,145],"require":[18],"substantial":[19],"training":[20,95],"data":[21,36,104],"achieve":[23],"high":[24],"performance.":[25],"If":[26],"true,":[27],"this":[28,72],"would":[29],"pose":[30],"challenges":[31],"in":[32,60,157],"scenarios":[33,62],"where":[34],"or":[37],"annotations":[38],"scarce,":[40],"such":[41],"as":[42],"underrepresented":[44],"traditions,":[46],"non-popular":[47],"genres,":[48],"and":[49,53,97,100,124,139],"personalized":[50],"creation":[52],"listening.":[54],"Understanding":[55],"how":[56],"these":[57],"behave":[59],"limited-data":[61,85,138],"could":[63],"be":[64],"crucial":[65],"developing":[67],"techniques":[68],"tackle":[70],"them.In":[71],"work,":[73],"we":[74],"investigate":[75],"the":[76,115],"behavior":[77],"of":[78],"several":[79],"representation":[82],"under":[84,133],"regimes.":[87],"We":[88,113,130],"consider":[89],"with":[92],"various":[93,119],"architectures,":[94],"paradigms,":[96],"input":[98],"durations,":[99],"train":[101],"them":[102],"collections":[105],"ranging":[106],"from":[107,137,147],"5":[108],"8,000":[110],"minutes":[111],"long.":[112],"evaluate":[114],"learned":[116,155],"representations":[117,136,156],"information":[121],"retrieval":[122],"tasks":[123],"analyze":[125],"their":[126],"robustness":[127],"noise.":[129],"show":[131],"that,":[132],"certain":[134],"conditions,":[135],"even":[140],"random":[141],"perform":[143],"comparatively":[144],"ones":[146],"large-dataset":[148],"models,":[149],"though":[150],"handcrafted":[151],"features":[152],"outperform":[153],"all":[154],"some":[158],"tasks.":[159]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
