{"id":"https://openalex.org/W4416251073","doi":"https://doi.org/10.1109/waspaa66052.2025.11230942","title":"Musical Source Separation Bake-Off: Comparing Objective Metrics with Human Perception","display_name":"Musical Source Separation Bake-Off: Comparing Objective Metrics with Human Perception","publication_year":2025,"publication_date":"2025-10-12","ids":{"openalex":"https://openalex.org/W4416251073","doi":"https://doi.org/10.1109/waspaa66052.2025.11230942"},"language":null,"primary_location":{"id":"doi:10.1109/waspaa66052.2025.11230942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa66052.2025.11230942","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119790555","display_name":"Noah Jaffe","orcid":null},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Noah Jaffe","raw_affiliation_strings":["Institute for Logic, Language, and Computation, University of Amsterdam,The Netherlands"],"affiliations":[{"raw_affiliation_string":"Institute for Logic, Language, and Computation, University of Amsterdam,The Netherlands","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061336585","display_name":"John Burgoyne","orcid":"https://orcid.org/0000-0001-6854-5646"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"John Ashley Burgoyne","raw_affiliation_strings":["Institute for Logic, Language, and Computation, University of Amsterdam,The Netherlands"],"affiliations":[{"raw_affiliation_string":"Institute for Logic, Language, and Computation, University of Amsterdam,The Netherlands","institution_ids":["https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5119790555"],"corresponding_institution_ids":["https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.45389178,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.8481000065803528,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.8481000065803528,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.07209999859333038,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12759","display_name":"Vehicle Noise and Vibration Control","score":0.0272000003606081,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5857999920845032},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5773000121116638},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.545199990272522},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.47929999232292175},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4250999987125397},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.34610000252723694},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.3188999891281128}],"concepts":[{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5857999920845032},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5773000121116638},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.545199990272522},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5382999777793884},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.506600022315979},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.47929999232292175},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4250999987125397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41190001368522644},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.34610000252723694},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.3188999891281128},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.27959999442100525},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2782000005245209},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C2985200191","wikidata":"https://www.wikidata.org/wiki/Q351087","display_name":"Pitch perception","level":3,"score":0.2669000029563904},{"id":"https://openalex.org/C3019940508","wikidata":"https://www.wikidata.org/wiki/Q185957","display_name":"Perceived quality","level":3,"score":0.259799987077713},{"id":"https://openalex.org/C128422554","wikidata":"https://www.wikidata.org/wiki/Q20077126","display_name":"Sound recording and reproduction","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C2639959","wikidata":"https://www.wikidata.org/wiki/Q1344778","display_name":"Distance measures","level":2,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/waspaa66052.2025.11230942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa66052.2025.11230942","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2022668263","https://openalex.org/W2105921478","https://openalex.org/W2127851351","https://openalex.org/W2395718496","https://openalex.org/W2526050071","https://openalex.org/W2565807767","https://openalex.org/W2799412098","https://openalex.org/W2963992487","https://openalex.org/W2964058413","https://openalex.org/W2972411915","https://openalex.org/W3145029257","https://openalex.org/W3209059054","https://openalex.org/W4225261970","https://openalex.org/W4372260310","https://openalex.org/W4392902957","https://openalex.org/W4392909836","https://openalex.org/W4402112400","https://openalex.org/W4408352395"],"related_works":[],"abstract_inverted_index":{"Music":[0],"source":[1,188],"separation":[2],"aims":[3],"to":[4,196],"extract":[5],"individual":[6],"sound":[7],"sources":[8],"(e.g.,":[9],"vocals,":[10],"drums,":[11],"guitar)":[12],"from":[13,63],"a":[14,48],"mixed":[15],"music":[16],"recording.":[17],"However,":[18,151],"evaluating":[19],"the":[20,32,53,97,107,127,154,170],"quality":[21,185],"of":[22,136,153],"separated":[23],"audio":[24],"remains":[25,96],"challenging,":[26],"as":[27],"commonly":[28],"used":[29],"metrics":[30,147],"like":[31],"source-to-distortion":[33],"ratio":[34,110],"(SDR)":[35],"do":[36],"not":[37],"always":[38],"align":[39],"with":[40,126,161],"human":[41,162],"perception.":[42],"In":[43],"this":[44],"study,":[45],"we":[46],"conducted":[47],"large-scale":[49],"listener":[50,66,114,194],"evaluation":[51,174],"on":[52],"MUSDB18":[54],"test":[55],"set,":[56],"collecting":[57],"approximately":[58],"30":[59],"ratings":[60,115,195],"per":[61],"track":[62],"seven":[64],"distinct":[65],"groups.":[67],"We":[68,190],"compared":[69],"several":[70],"objective":[71],"energy-ratio":[72],"metrics,":[73,156],"including":[74,157],"legacy":[75],"measures":[76],"(BSSEval":[77],"v4,":[78],"SI-SDR":[79],"variants),":[80],"and":[81,92,118,140,176,199],"embedding-based":[82,155],"alternatives":[83],"(Fr\u00e9chet":[84],"Audio":[85,122],"Distance":[86,123],"using":[87],"CLAP-LAION-music,":[88],"EnCodec,":[89],"VGGish,":[90],"Wave2Vec2,":[91],"HuBERT).":[93],"While":[94],"SDR":[95],"best-performing":[98],"metric":[99,181],"for":[100,116,138,142,148,164,172],"vocal":[101,165],"estimates,":[102],"our":[103,192],"results":[104],"show":[105],"that":[106,178],"scale-invariant":[108],"signal-to-artifacts":[109],"(SI-SAR)":[111],"better":[112],"predicts":[113],"drums":[117,139],"bass":[119],"stems.":[120,150],"Fr\u00e9chet":[121],"(FAD)":[124],"computed":[125],"CLAP-LAION-music":[128],"embedding":[129],"also":[130],"performs":[131],"competitively\u2014achieving":[132],"Kendall's":[133],"\u03c4":[134],"values":[135],"0.25":[137],"0.19":[141],"bass\u2014matching":[143],"or":[144],"surpassing":[145],"energy-based":[146],"those":[149],"none":[152],"CLAP,":[158],"correlate":[159],"positively":[160],"perception":[163],"estimates.":[166],"These":[167],"findings":[168],"highlight":[169],"need":[171],"stem-specific":[173],"strategies":[175],"suggest":[177],"no":[179],"single":[180],"reliably":[182],"reflects":[183],"perceptual":[184],"across":[186],"all":[187],"types.":[189],"release":[191],"raw":[193],"support":[197],"reproducibility":[198],"further":[200],"research.":[201]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
