{"id":"https://openalex.org/W7108749125","doi":"https://doi.org/10.5281/zenodo.17811494","title":"User-Guided Generative Source Separation","display_name":"User-Guided Generative Source Separation","publication_year":2025,"publication_date":"2025-09-21","ids":{"openalex":"https://openalex.org/W7108749125","doi":"https://doi.org/10.5281/zenodo.17811494"},"language":null,"primary_location":{"id":"doi:10.5281/zenodo.17811494","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811494","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.17811494","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yutong Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yutong Wen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Minje Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Minje Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Paris  Smaragdis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paris  Smaragdis","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.62567132,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6912999749183655,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6912999749183655,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.1378999948501587,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.12139999866485596,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6449000239372253},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.6306999921798706},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.5971999764442444},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4878999888896942},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44859999418258667},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.44519999623298645},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.4300999939441681},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4189000129699707}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7347000241279602},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6449000239372253},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.6306999921798706},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.5971999764442444},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5167999863624573},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4878999888896942},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44859999418258667},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.44519999623298645},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.4300999939441681},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4189000129699707},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3986000120639801},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.39739999175071716},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3700999915599823},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.33980000019073486},{"id":"https://openalex.org/C184408114","wikidata":"https://www.wikidata.org/wiki/Q1502022","display_name":"Generative Design","level":3,"score":0.3156000077724457},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.29899999499320984},{"id":"https://openalex.org/C23161992","wikidata":"https://www.wikidata.org/wiki/Q2922301","display_name":"Hum","level":3,"score":0.295199990272522},{"id":"https://openalex.org/C2779182362","wikidata":"https://www.wikidata.org/wiki/Q17126187","display_name":"Session (web analytics)","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.27959999442100525},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.27889999747276306},{"id":"https://openalex.org/C2778924833","wikidata":"https://www.wikidata.org/wiki/Q7064603","display_name":"Novelty detection","level":3,"score":0.2556000053882599},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.17811494","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811494","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.17811494","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811494","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Music":[0],"source":[1],"separation":[2,23,53,143],"(MSS)":[3],"aims":[4],"to":[5,126],"extract":[6],"individual":[7],"instrument":[8,148],"sources":[9],"from":[10],"their":[11],"mixture.":[12],"While":[13],"most":[14],"existing":[15],"methods":[16],"focus":[17],"on":[18,61,95],"the":[19,34,55,77,107,122,151,157,171],"widely":[20],"adopted":[21],"four-stem":[22,56],"setup":[24],"(vocals,":[25],"bass,":[26],"drums,":[27],"and":[28,80,113,130,135,170],"other":[29],"instruments),":[30],"this":[31],"approach":[32],"lacks":[33],"flexibility":[35,112],"needed":[36],"for":[37,88,161],"real-world":[38],"applications.":[39],"To":[40],"address":[41],"this,":[42],"we":[43,116],"propose":[44],"GuideSep,":[45],"a":[46,64,118],"diffusion-based":[47,158],"MSS":[48],"model":[49,124],"capable":[50],"of":[51,153],"instrument-agnostic":[52],"beyond":[54],"setup.":[57],"GuideSep":[58,140],"is":[59,174],"conditioned":[60],"multiple":[62],"inputs:":[63],"waveform":[65],"mimicry":[66],"condition,":[67],"which":[68,84],"can":[69],"be":[70,166],"easily":[71],"provided":[72],"by":[73],"humming":[74],"or":[75,99],"playing":[76],"target":[78],"melody,":[79],"mel-spectrogram":[81],"domain":[82],"masks,":[83],"offer":[85],"additional":[86],"guidance":[87],"separation.":[89],"Unlike":[90],"prior":[91],"approaches":[92],"that":[93,139],"relied":[94],"fixed":[96],"class":[97],"labels":[98],"sound":[100],"queries,":[101],"our":[102],"conditioning":[103],"scheme,":[104],"coupled":[105],"with":[106],"generative":[108,131,159],"approach,":[109],"provides":[110],"greater":[111],"applicability.":[114],"Additionally,":[115],"design":[117],"mask-prediction":[119],"baseline":[120],"using":[121],"same":[123],"architecture":[125],"systematically":[127],"compare":[128],"predictive":[129],"approaches.":[132],"Our":[133,163],"objective":[134],"subjective":[136],"evaluations":[137],"demonstrate":[138],"achieves":[141],"high-quality":[142],"while":[144],"enabling":[145],"more":[146],"versatile":[147],"extraction,":[149],"highlighting":[150],"potential":[152],"user":[154],"participation":[155],"in":[156],"process":[160],"MSS.":[162],"code":[164],"will":[165],"released":[167],"upon":[168],"acceptance,":[169],"demo":[172],"page":[173],"https://reliable-marzipan-458f0e.netlify.app.":[175]},"counts_by_year":[],"updated_date":"2025-12-05T23:25:22.460635","created_date":"2025-12-05T00:00:00"}
