{"id":"https://openalex.org/W4412377260","doi":"https://doi.org/10.1145/3726302.3730034","title":"MIDI-Zero: A MIDI-driven Self-Supervised Learning Approach for Music Retrieval","display_name":"MIDI-Zero: A MIDI-driven Self-Supervised Learning Approach for Music Retrieval","publication_year":2025,"publication_date":"2025-07-13","ids":{"openalex":"https://openalex.org/W4412377260","doi":"https://doi.org/10.1145/3726302.3730034"},"language":"en","primary_location":{"id":"doi:10.1145/3726302.3730034","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730034","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730034","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730034","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111746596","display_name":"Y. Su","orcid":null},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhang Su","raw_affiliation_strings":["Beijing University of Chemical Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-3957-5727","affiliations":[{"raw_affiliation_string":"Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100727076","display_name":"Wei Hu","orcid":"https://orcid.org/0000-0001-5320-6086"},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Hu","raw_affiliation_strings":["Department of Computer Science and Technology, Beijing University of Chemical Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5320-6086","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108315960","display_name":"Hongfeng Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongfeng Gao","raw_affiliation_strings":["Beijing University of Chemical Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-2953-7223","affiliations":[{"raw_affiliation_string":"Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100690860","display_name":"Fan Zhang","orcid":"https://orcid.org/0000-0002-2058-2373"},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Zhang","raw_affiliation_strings":["Beijing University of Chemical Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-2058-2373","affiliations":[{"raw_affiliation_string":"Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I75390827"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16940856,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"348","last_page":"357"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/midi","display_name":"MIDI","score":0.9872875213623047},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6944159865379333},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.5308015942573547},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3955310881137848},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3703232407569885}],"concepts":[{"id":"https://openalex.org/C8112396","wikidata":"https://www.wikidata.org/wiki/Q80535","display_name":"MIDI","level":2,"score":0.9872875213623047},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6944159865379333},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.5308015942573547},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3955310881137848},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3703232407569885},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3726302.3730034","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730034","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730034","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3726302.3730034","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730034","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730034","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2454424885","display_name":null,"funder_award_id":"62271034","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5260246792","display_name":null,"funder_award_id":"62271034","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412377260.pdf","grobid_xml":"https://content.openalex.org/works/W4412377260.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W83334756","https://openalex.org/W1966280390","https://openalex.org/W2027518030","https://openalex.org/W2047411082","https://openalex.org/W2063844407","https://openalex.org/W2076694837","https://openalex.org/W2194775991","https://openalex.org/W2906214917","https://openalex.org/W2965178495","https://openalex.org/W2998702515","https://openalex.org/W3015666964","https://openalex.org/W3016629994","https://openalex.org/W3092850823","https://openalex.org/W3158762648","https://openalex.org/W3161928252","https://openalex.org/W3162472933","https://openalex.org/W3205091761","https://openalex.org/W4221161255","https://openalex.org/W4224920338","https://openalex.org/W4225281045","https://openalex.org/W4308086144","https://openalex.org/W4372266723","https://openalex.org/W4384643806","https://openalex.org/W4403792443","https://openalex.org/W6600769105","https://openalex.org/W6603388714"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W3133630535","https://openalex.org/W2398084541","https://openalex.org/W4242364395","https://openalex.org/W2386555541","https://openalex.org/W3042962886","https://openalex.org/W2920621887","https://openalex.org/W2187442448"],"abstract_inverted_index":{"Content-based":[0],"Music":[1,46,126],"Retrieval":[2],"(CBMR)":[3],"is":[4,94,113],"a":[5,57,70,173],"fundamental":[6],"task":[7,100],"in":[8,152],"music":[9,50,62,110,119,169,179],"information":[10],"retrieval,":[11],"encompassing":[12],"sub-tasks":[13],"including":[14],"Audio":[15,17,153],"Identification,":[16],"Matching,":[18],"and":[19,38,48,121,167,171,175],"Version":[20],"Identification.":[21],"Traditional":[22],"methods":[23],"typically":[24],"analyze":[25],"audio":[26,166],"signals":[27],"or":[28,108],"spectrograms":[29],"to":[30,34,115],"extract":[31],"features":[32],"related":[33],"rhythm,":[35],"melody,":[36],"harmony,":[37],"timbre.":[39],"However,":[40],"with":[41,135],"the":[42,103,158,163],"rapid":[43],"development":[44],"of":[45],"Transcription":[47,127],"digital":[49],"technologies,":[51],"MIDI":[52,80],"representation":[53],"has":[54],"emerged":[55],"as":[56],"powerful":[58],"alternative":[59],"fo":[60],"r":[61],"analysis.":[63],"In":[64],"this":[65],"paper,":[66],"we":[67],"propose":[68],"MIDI-Zero,":[69],"novel":[71],"self-supervisedlearning":[72],"framework":[73],"for":[74,105,178],"CBMR":[75,148],"that":[76,141],"operates":[77],"entirely":[78],"on":[79,98],"representations.":[81],"Unlike":[82],"existing":[83],"approaches,":[84],"MIDI-Zero":[85,112,142],"requires":[86],"no":[87],"external":[88,109],"training":[89,92],"data;":[90],"all":[91],"data":[93,120],"automatically":[95],"generated":[96],"based":[97],"predefined":[99],"rules,":[101],"eliminating":[102],"need":[104],"labeled":[106],"datasets":[107],"collections.":[111],"designed":[114],"handle":[116],"both":[117],"symbolic":[118,168],"audio-based":[122],"tasks":[123],"by":[124],"leveraging":[125],"models.":[128],"Its":[129],"strong":[130],"robustness":[131],"ensures":[132],"effectiveness":[133],"even":[134],"low-quality":[136],"transcriptions.":[137],"Extensive":[138],"experiments":[139],"demonstrate":[140],"achieves":[143],"competitive":[144],"performance":[145],"across":[146],"various":[147],"sub-tasks,":[149],"particularly":[150],"excelling":[151],"Matching.":[154],"Our":[155],"approach":[156],"simplifies":[157],"feature":[159],"extraction":[160],"process,":[161],"bridges":[162],"gap":[164],"between":[165],"representations,":[170],"offers":[172],"versatile":[174],"scalable":[176],"solution":[177],"retrieval.":[180]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
