{"id":"https://openalex.org/W6893306190","doi":"https://doi.org/10.5281/zenodo.14877401","title":"Semi-Supervised Contrastive Learning of Musical Representations","display_name":"Semi-Supervised Contrastive Learning of Musical Representations","publication_year":2024,"publication_date":"2024-11-10","ids":{"openalex":"https://openalex.org/W6893306190","doi":"https://doi.org/10.5281/zenodo.14877401"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.14877401","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877401","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.14877401","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Julien PM Guinot","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Julien PM Guinot","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Elio Quinton","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elio Quinton","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"George Fazekas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"George Fazekas","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37680694,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9613000154495239,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9613000154495239,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.009499999694526196,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.007199999876320362,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.6582000255584717},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6044999957084656},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.4683000147342682},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4413999915122986},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4171000123023987},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.40959998965263367},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.3953999876976013}],"concepts":[{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.6582000255584717},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6291000247001648},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6044999957084656},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5151000022888184},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5001000165939331},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.4683000147342682},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4413999915122986},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4171000123023987},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.40959998965263367},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.3953999876976013},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.39419999718666077},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.33169999718666077},{"id":"https://openalex.org/C2777629044","wikidata":"https://www.wikidata.org/wiki/Q614959","display_name":"Contrastive analysis","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3154999911785126},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.3077999949455261},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2913999855518341},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2590000033378601}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.14877401","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877401","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.14877401","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877401","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.8113451600074768}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"the":[1,10,63,121,126,134],"success":[2],"of":[3,13,66,107,114,128,187],"contrastive":[4,14,48,64,78,85],"learning":[5,49,65,79,155],"in":[6,62,87,191],"Music":[7],"Information":[8],"Retrieval,":[9],"inherent":[11],"ambiguity":[12],"self-supervision":[15],"presents":[16],"a":[17,32,51,88,105],"challenge.":[18],"Relying":[19],"solely":[20],"on":[21,104,157,178],"augmentation":[22],"chains":[23],"and":[24,83,99,140,169],"self-supervised":[25,77,84,182],"positive":[26],"sampling":[27],"strategies":[28],"can":[29],"lead":[30],"to":[31,101],"pretraining":[33],"objective":[34],"that":[35,131],"does":[36],"not":[37,161],"capture":[38],"key":[39,170],"musical":[40,67,137],"information":[41],"for":[42,54],"downstream":[43,97,108,148],"tasks.":[44],"We":[45,151],"introduce":[46],"semi-supervised":[47],"(SemiSupCon),":[50],"simple":[52],"method":[53],"leveraging":[55],"musically":[56,72,158],"informed":[57],"labeled":[58,115,129],"data":[59,130],"(supervision":[60],"signals)":[61],"representations.":[68],"Our":[69,117],"approach":[70,118,174],"introduces":[71],"relevant":[73],"supervision":[74],"signals":[75],"into":[76],"by":[80],"combining":[81],"supervised":[82],"objectives":[86],"simpler":[89],"framework":[90,95],"than":[91],"previous":[92],"approaches.":[93],"This":[94],"improves":[96,142],"performance":[98,144,149,156,176],"robustness":[100],"audio":[102],"corruptions":[103],"range":[106],"MIR":[109],"tasks":[110,164],"with":[111,136,145,184],"moderate":[112],"amounts":[113],"data.":[116],"enables":[119],"shaping":[120],"learned":[122],"similarity":[123],"metric":[124],"through":[125],"choice":[127],"(1)":[132],"infuses":[133],"representations":[135],"domain":[138],"knowledge":[139],"(2)":[141],"out-of-domain":[143],"minimal":[146],"general":[147],"loss.":[150],"show":[152],"strong":[153],"transfer":[154],"related":[159],"yet":[160],"trivially":[162],"similar":[163],"-":[165],"such":[166],"as":[167],"pitch":[168],"estimation.":[171],"Additionally,":[172],"our":[173],"shows":[175],"improvement":[177],"automatic":[179],"tagging":[180],"over":[181],"approaches":[183],"only":[185],"5%":[186],"available":[188],"labels":[189],"included":[190],"pretraining.":[192]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
