{"id":"https://openalex.org/W6893245335","doi":"https://doi.org/10.5281/zenodo.14877513","title":"A Stem-Agnostic Single-Decoder System for Music Source Separation Beyond Four Stems","display_name":"A Stem-Agnostic Single-Decoder System for Music Source Separation Beyond Four Stems","publication_year":2024,"publication_date":"2024-11-10","ids":{"openalex":"https://openalex.org/W6893245335","doi":"https://doi.org/10.5281/zenodo.14877513"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.14877513","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877513","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.14877513","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Karn N. Watcharasupat","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Karn N. Watcharasupat","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Alexander Lerch","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alexander Lerch","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37605739,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6531000137329102,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6531000137329102,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.2556000053882599,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.05249999836087227,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.8264999985694885},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.633899986743927},{"id":"https://openalex.org/keywords/blind-signal-separation","display_name":"Blind signal separation","score":0.48980000615119934},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.3695000112056732},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.36469998955726624},{"id":"https://openalex.org/keywords/secondary-source","display_name":"Secondary source","score":0.32280001044273376}],"concepts":[{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.8264999985694885},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6703000068664551},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.633899986743927},{"id":"https://openalex.org/C120317606","wikidata":"https://www.wikidata.org/wiki/Q17105967","display_name":"Blind signal separation","level":3,"score":0.48980000615119934},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.3695000112056732},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.36469998955726624},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.34940001368522644},{"id":"https://openalex.org/C82979123","wikidata":"https://www.wikidata.org/wiki/Q905511","display_name":"Secondary source","level":2,"score":0.32280001044273376},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3154999911785126},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.29840001463890076},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2964000105857849},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2797999978065491},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C76214141","wikidata":"https://www.wikidata.org/wiki/Q2465506","display_name":"Separation of concerns","level":3,"score":0.25369998812675476},{"id":"https://openalex.org/C2777814067","wikidata":"https://www.wikidata.org/wiki/Q1752317","display_name":"Tandem","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.14877513","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877513","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.14877513","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877513","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"significant":[1],"recent":[2],"progress":[3],"across":[4],"multiple":[5,92],"sub-tasks":[6],"of":[7,58,74,91,155,172],"audio":[8],"source":[9,13,36,89,100],"separation,":[10],"few":[11,31],"music":[12,115],"separation":[14,17,37,90,101,154],"systems":[15,33,66,76],"support":[16,35,53,62],"beyond":[18,38],"the":[19,29,121,140,153,170],"four-stem":[20],"vocals,":[21],"drums,":[22],"bass,":[23],"and":[24,164,179],"other":[25],"(VDBO)":[26],"setup.":[27],"Of":[28],"very":[30],"current":[32],"that":[34,50,87],"this":[39],"setup,":[40],"most":[41],"continue":[42],"to":[43,105,169],"rely":[44],"on":[45,134],"an":[46],"inflexible":[47,65],"decoder":[48],"setup":[49,110,150],"can":[51,165],"only":[52,127],"a":[54,85,108,114],"fixed":[55],"pre-defined":[56],"set":[57],"stems.":[59],"Increasing":[60],"stem":[61],"in":[63,107,111],"these":[64,75],"correspondingly":[67],"requires":[68],"increasing":[69],"computational":[70],"complexity,":[71],"rendering":[72],"extensions":[73],"computationally":[77],"infeasible":[78],"for":[79,152],"long-tail":[80],"instruments.":[81],"We":[82],"propose":[83],"Banquet,":[84],"system":[86],"allows":[88,151],"stems":[93,175],"using":[94],"just":[95],"one":[96],"decoder.":[97],"A":[98],"bandsplit":[99],"model":[102],"is":[103],"extended":[104],"work":[106],"query-based":[109,149],"tandem":[112],"with":[113,136],"instrument":[116,157],"recognition":[117],"PaSST":[118],"model.":[119],"On":[120],"MoisesDB":[122],"dataset,":[123],"Banquet":[124],"\u2014":[125,132],"at":[126],"24.9":[128],"M":[129],"trainable":[130],"parameters":[131],"performed":[133],"par":[135],"or":[137],"better":[138],"than":[139],"significantly":[141],"more":[142],"complex":[143],"6-stem":[144],"Hybrid":[145],"Transformer":[146],"Demucs.":[147],"The":[148],"narrow":[156],"classes":[158],"such":[159,176],"as":[160,177],"clean":[161],"acoustic":[162],"guitars,":[163],"be":[166],"successfully":[167],"applied":[168],"extraction":[171],"less":[173],"common":[174],"reeds":[178],"organs.":[180]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
