{"id":"https://openalex.org/W4392931767","doi":"https://doi.org/10.1109/icassp48485.2024.10447473","title":"An Experimental Comparison of Multi-View Self-Supervised Methods for Music Tagging","display_name":"An Experimental Comparison of Multi-View Self-Supervised Methods for Music Tagging","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392931767","doi":"https://doi.org/10.1109/icassp48485.2024.10447473"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447473","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447473","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062693009","display_name":"Gabriel Meseguer-Brocal","orcid":"https://orcid.org/0000-0002-5232-8628"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gabriel Meseguer-Brocal","raw_affiliation_strings":["Deezer Research,Paris,France","Deezer Research, Paris, France"],"affiliations":[{"raw_affiliation_string":"Deezer Research,Paris,France","institution_ids":[]},{"raw_affiliation_string":"Deezer Research, Paris, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016930163","display_name":"Dorian Desblancs","orcid":"https://orcid.org/0000-0003-1727-4246"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dorian Desblancs","raw_affiliation_strings":["Deezer Research,Paris,France","Deezer Research, Paris, France"],"affiliations":[{"raw_affiliation_string":"Deezer Research,Paris,France","institution_ids":[]},{"raw_affiliation_string":"Deezer Research, Paris, France","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004879177","display_name":"Romain Hennequin","orcid":"https://orcid.org/0000-0001-8158-5562"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Romain Hennequin","raw_affiliation_strings":["Deezer Research,Paris,France","Deezer Research, Paris, France"],"affiliations":[{"raw_affiliation_string":"Deezer Research,Paris,France","institution_ids":[]},{"raw_affiliation_string":"Deezer Research, Paris, France","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5062693009"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.2502,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.87943035,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1141","last_page":"1145"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8104597330093384},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6095668077468872},{"id":"https://openalex.org/keywords/pretext","display_name":"Pretext","score":0.6091465950012207},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5612937211990356},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5545764565467834},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.4513024091720581},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.44624513387680054},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.41224777698516846},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.37780070304870605},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35345786809921265},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.09896326065063477}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8104597330093384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6095668077468872},{"id":"https://openalex.org/C2779627259","wikidata":"https://www.wikidata.org/wiki/Q779763","display_name":"Pretext","level":3,"score":0.6091465950012207},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5612937211990356},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5545764565467834},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.4513024091720581},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.44624513387680054},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.41224777698516846},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.37780070304870605},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35345786809921265},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.09896326065063477},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447473","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447473","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W2127870748","https://openalex.org/W2194775991","https://openalex.org/W2555897561","https://openalex.org/W2619697695","https://openalex.org/W2896457183","https://openalex.org/W2950060770","https://openalex.org/W2954540134","https://openalex.org/W3005680577","https://openalex.org/W3015949486","https://openalex.org/W3035060554","https://openalex.org/W3036224891","https://openalex.org/W3036601975","https://openalex.org/W3134652006","https://openalex.org/W3139211892","https://openalex.org/W3159481202","https://openalex.org/W3162391496","https://openalex.org/W3166396011","https://openalex.org/W3176276772","https://openalex.org/W3201143670","https://openalex.org/W3205065023","https://openalex.org/W4221167446","https://openalex.org/W4225713393","https://openalex.org/W4226219311","https://openalex.org/W4280498166","https://openalex.org/W4286695273","https://openalex.org/W4292779060","https://openalex.org/W4293342670","https://openalex.org/W4304699761","https://openalex.org/W4308860702","https://openalex.org/W4311618684","https://openalex.org/W4367000428","https://openalex.org/W4379251869","https://openalex.org/W6633499030","https://openalex.org/W6678969435","https://openalex.org/W6730323794","https://openalex.org/W6763945542","https://openalex.org/W6765696844","https://openalex.org/W6774314701","https://openalex.org/W6778883912","https://openalex.org/W6779326418","https://openalex.org/W6779997284","https://openalex.org/W6780218876","https://openalex.org/W6791353385","https://openalex.org/W6791537541","https://openalex.org/W6791742336","https://openalex.org/W6795754764","https://openalex.org/W6802387851","https://openalex.org/W6809715509","https://openalex.org/W6843026064","https://openalex.org/W6845228019","https://openalex.org/W6846470732","https://openalex.org/W6851949647","https://openalex.org/W6853393314"],"related_works":["https://openalex.org/W161456234","https://openalex.org/W3123043866","https://openalex.org/W2765162471","https://openalex.org/W2367130511","https://openalex.org/W4235007455","https://openalex.org/W2354300066","https://openalex.org/W2996988663","https://openalex.org/W2276802262","https://openalex.org/W2089741817","https://openalex.org/W2358993285"],"abstract_inverted_index":{"Self-supervised":[0],"learning":[1,12,101,162],"has":[2],"emerged":[3],"as":[4,74],"a":[5,133,139,179],"powerful":[6],"way":[7],"to":[8,79,116,170],"pre-train":[9],"generalizable":[10],"machine":[11],"models":[13,41],"on":[14,44,99,138],"large":[15],"amounts":[16],"of":[17,51,68,93,112,124,142,144,152],"unlabeled":[18],"data.":[19],"It":[20],"is":[21,32,72],"particularly":[22],"compelling":[23],"in":[24,157,165,178],"the":[25,38,48,69,77,81,91,110,122],"music":[26,117,129],"domain,":[27],"where":[28],"obtaining":[29],"labeled":[30],"data":[31],"time-consuming,":[33],"error-prone,":[34],"and":[35,54,120],"ambiguous.":[36],"During":[37],"self-supervised":[39,126,172],"process,":[40],"are":[42],"trained":[43,137],"pretext":[45,70,113],"tasks,":[46],"with":[47,84],"primary":[49],"objective":[50],"acquiring":[52],"robust":[53],"informative":[55],"features":[56],"that":[57],"can":[58],"later":[59],"be":[60],"fine-tuned":[61],"for":[62,87,128],"specific":[63],"downstream":[64,159,167,181],"tasks.":[65],"The":[66],"choice":[67],"task":[71],"critical":[73],"it":[75],"guides":[76],"model":[78,136],"shape":[80],"feature":[82],"space":[83],"meaningful":[85],"constraints":[86],"information":[88],"encoding.":[89],"In":[90,105],"context":[92],"music,":[94],"most":[95,151],"works":[96],"have":[97],"relied":[98],"contrastive":[100,161],"or":[102],"masking":[103],"techniques.":[104],"this":[106],"study,":[107],"we":[108],"expand":[109],"scope":[111],"tasks":[114],"applied":[115],"by":[118],"investigating":[119],"comparing":[121],"performance":[123,168],"new":[125],"methods":[127,155],"tagging.":[130],"We":[131],"open-source":[132],"simple":[134],"ResNet":[135],"diverse":[140],"catalog":[141],"millions":[143],"tracks.":[145],"Our":[146],"results":[147,164],"demonstrate":[148],"that,":[149],"although":[150],"these":[153],"pre-training":[154,173],"result":[156],"similar":[158],"results,":[160],"consistently":[163],"better":[166],"compared":[169],"other":[171],"methods.":[174],"This":[175],"holds":[176],"true":[177],"limited-data":[180],"context.":[182]},"counts_by_year":[{"year":2025,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
