{"id":"https://openalex.org/W6912185530","doi":"https://doi.org/10.5281/zenodo.14877459","title":"MuChoMusic: Evaluating Music Understanding in Multimodal Audio-Language Models","display_name":"MuChoMusic: Evaluating Music Understanding in Multimodal Audio-Language Models","publication_year":2024,"publication_date":"2024-11-10","ids":{"openalex":"https://openalex.org/W6912185530","doi":"https://doi.org/10.5281/zenodo.14877459"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.14877459","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877459","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.14877459","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Benno Weck","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Benno Weck","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ilaria Manco","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ilaria Manco","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Emmanouil Benetos","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Emmanouil Benetos","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Elio Quinton","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elio Quinton","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"George Fazekas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"George Fazekas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Dmitry Bogdanov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dmitry Bogdanov","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39294319,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9157000184059143,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9157000184059143,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.009100000374019146,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10788","display_name":"Neuroscience and Music Perception","score":0.008799999952316284,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.7788000106811523},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5648999810218811},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.48559999465942383},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.4747999906539917},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.4740999937057495},{"id":"https://openalex.org/keywords/cover","display_name":"Cover (algebra)","score":0.38580000400543213}],"concepts":[{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.7788000106811523},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7494000196456909},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5648999810218811},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.48559999465942383},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.4747999906539917},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.4740999937057495},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.38580000400543213},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.3659000098705292},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3634999990463257},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36149999499320984},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.36079999804496765},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.35740000009536743},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3165000081062317},{"id":"https://openalex.org/C143857728","wikidata":"https://www.wikidata.org/wiki/Q193544","display_name":"Music theory","level":3,"score":0.2655999958515167},{"id":"https://openalex.org/C160372630","wikidata":"https://www.wikidata.org/wiki/Q4819855","display_name":"Audio analyzer","level":5,"score":0.26429998874664307},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2596000134944916}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.14877459","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877459","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.14877459","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877459","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"score":0.7062035799026489,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"models":[1,39,93,165],"that":[2,140],"jointly":[3],"process":[4],"audio":[5,12,36],"and":[6,14,30,60,118,134,145,150,166,186],"language":[7,92,175],"hold":[8],"great":[9],"promise":[10],"in":[11,19,90,126],"understanding":[13,49,89],"are":[15,129,188],"increasingly":[16],"being":[17],"adopted":[18],"the":[20,41,127,154,159,174],"music":[21,48,88,109,116],"domain.":[22],"By":[23],"allowing":[24],"users":[25],"to":[26,43,65,70,131,148,178],"query":[27],"via":[28,51],"text":[29],"obtain":[31],"information":[32],"about":[33],"a":[34,45,84,120,179],"given":[35],"input,":[37],"these":[38],"have":[40],"potential":[42],"enable":[44],"variety":[46,122],"of":[47,123],"tasks":[50],"language-based":[52],"interfaces.":[53],"However,":[54],"their":[55,68,146],"evaluation":[56],"poses":[57],"considerable":[58],"challenges,":[59],"it":[61],"remains":[62],"unclear":[63],"how":[64],"effectively":[66],"assess":[67,132],"ability":[69],"correctly":[71],"interpret":[72],"music-related":[73],"inputs":[74],"with":[75],"current":[76],"methods.":[77],"Motivated":[78],"by":[79,104,158],"this,":[80],"we":[81,161],"introduce":[82],"MuChoMusic,":[83],"benchmark":[85,128],"for":[86,181],"evaluating":[87],"multimodal":[91,183],"focused":[94],"on":[95,107,173],"audio.":[96],"MuChoMusic":[97],"comprises":[98],"1,187":[99],"multiple-choice":[100],"questions,":[101],"all":[102],"validated":[103],"human":[105],"annotators,":[106],"644":[108],"tracks":[110],"sourced":[111],"from":[112],"two":[113],"publicly":[114],"available":[115],"datasets,":[117],"covering":[119],"wide":[121],"genres.":[124],"Questions":[125],"crafted":[130],"knowledge":[133],"reasoning":[135],"abilities":[136],"across":[137],"several":[138,168],"dimensions":[139],"cover":[141],"fundamental":[142],"musical":[143],"concepts":[144],"relation":[147],"cultural":[149],"functional":[151],"contexts.":[152],"Through":[153],"holistic":[155],"analysis":[156],"afforded":[157],"benchmark,":[160],"evaluate":[162],"five":[163],"open-source":[164],"identify":[167],"pitfalls,":[169],"including":[170],"an":[171],"over-reliance":[172],"modality,":[176],"pointing":[177],"need":[180],"better":[182],"integration.":[184],"Data":[185],"code":[187],"open-sourced.":[189]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
