{"id":"https://openalex.org/W3205879560","doi":"https://doi.org/10.1109/icassp43922.2022.9746005","title":"The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks","display_name":"The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3205879560","doi":"https://doi.org/10.1109/icassp43922.2022.9746005","mag":"3205879560"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746005","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746005","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018337836","display_name":"Darius Petermann","orcid":"https://orcid.org/0000-0002-5973-5752"},"institutions":[{"id":"https://openalex.org/I4210119109","display_name":"Indiana University Bloomington","ror":"https://ror.org/02k40bc56","country_code":"US","type":"education","lineage":["https://openalex.org/I4210119109","https://openalex.org/I592451"]},{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Darius Petermann","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","Department of Intelligent Systems Engineering, Indiana University, Bloomington, IN, USA"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Department of Intelligent Systems Engineering, Indiana University, Bloomington, IN, USA","institution_ids":["https://openalex.org/I4210119109"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086940921","display_name":"Gordon Wichern","orcid":"https://orcid.org/0000-0002-8597-6795"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gordon Wichern","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101607498","display_name":"Zhong-Qiu Wang","orcid":"https://orcid.org/0000-0002-4204-9430"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhong-Qiu Wang","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076453358","display_name":"Jonathan Le Roux","orcid":"https://orcid.org/0000-0002-3451-171X"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonathan Le Roux","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5018337836"],"corresponding_institution_ids":["https://openalex.org/I4210119109","https://openalex.org/I4210159266"],"apc_list":null,"apc_paid":null,"fwci":3.682,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.95007899,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"526","last_page":"530"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6919685006141663},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6836264133453369},{"id":"https://openalex.org/keywords/loudness","display_name":"Loudness","score":0.6133681535720825},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.5947592854499817},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4836910367012024},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4700269401073456},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.46608206629753113},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.4274210035800934},{"id":"https://openalex.org/keywords/audio-analyzer","display_name":"Audio analyzer","score":0.425004780292511},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4209582507610321},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.31388211250305176},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.29748231172561646},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.2517605125904083},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.22891110181808472},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20028293132781982},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09154075384140015}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6919685006141663},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6836264133453369},{"id":"https://openalex.org/C79018884","wikidata":"https://www.wikidata.org/wiki/Q622324","display_name":"Loudness","level":2,"score":0.6133681535720825},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.5947592854499817},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4836910367012024},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4700269401073456},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.46608206629753113},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.4274210035800934},{"id":"https://openalex.org/C160372630","wikidata":"https://www.wikidata.org/wiki/Q4819855","display_name":"Audio analyzer","level":5,"score":0.425004780292511},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4209582507610321},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.31388211250305176},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.29748231172561646},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.2517605125904083},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.22891110181808472},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20028293132781982},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09154075384140015},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746005","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746005","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4399999976158142,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1482149378","https://openalex.org/W1494198834","https://openalex.org/W1971168548","https://openalex.org/W1991139021","https://openalex.org/W2127851351","https://openalex.org/W2133049755","https://openalex.org/W2153331367","https://openalex.org/W2221409856","https://openalex.org/W2408744528","https://openalex.org/W2580221632","https://openalex.org/W2584992898","https://openalex.org/W2593116425","https://openalex.org/W2766607545","https://openalex.org/W2885307078","https://openalex.org/W2895807593","https://openalex.org/W2952218014","https://openalex.org/W2962866211","https://openalex.org/W2963358591","https://openalex.org/W2963631056","https://openalex.org/W2964058413","https://openalex.org/W2968951209","https://openalex.org/W2972411915","https://openalex.org/W2972541922","https://openalex.org/W2982456909","https://openalex.org/W2984935418","https://openalex.org/W2990666817","https://openalex.org/W2997987128","https://openalex.org/W2998490864","https://openalex.org/W2998657200","https://openalex.org/W3016232124","https://openalex.org/W3090388844","https://openalex.org/W3095263845","https://openalex.org/W3095738461","https://openalex.org/W3097906045","https://openalex.org/W3099330747","https://openalex.org/W3104704316","https://openalex.org/W3160050861","https://openalex.org/W3160076412","https://openalex.org/W3169030202","https://openalex.org/W3179030250","https://openalex.org/W3185389098","https://openalex.org/W4205689591","https://openalex.org/W4210493250","https://openalex.org/W4298310324","https://openalex.org/W6732646663","https://openalex.org/W6766320909","https://openalex.org/W6769802879","https://openalex.org/W6783462664"],"related_works":["https://openalex.org/W2892072280","https://openalex.org/W2996518094","https://openalex.org/W1260776272","https://openalex.org/W2141742336","https://openalex.org/W4251126290","https://openalex.org/W2065816364","https://openalex.org/W4389228371","https://openalex.org/W2029008115","https://openalex.org/W1967861272","https://openalex.org/W2106612477"],"abstract_inverted_index":{"The":[0],"cocktail":[1,95],"party":[2],"problem":[3],"aims":[4],"at":[5,144],"isolating":[6],"any":[7],"source":[8,21,136,150,173],"of":[9,61,85,135,167,170,184],"interest":[10],"within":[11],"a":[12,82,158],"complex":[13],"acoustic":[14,168],"scene,":[15],"and":[16,64,72,98,102,138,141,155,193],"has":[17,76],"long":[18],"inspired":[19],"audio":[20,51,118],"separation":[22,151],"research.":[23],"Recent":[24],"efforts":[25],"have":[26],"mainly":[27],"focused":[28],"on":[29,109,153],"separating":[30,49],"speech":[31,34],"from":[32,35,39,45,115],"noise,":[33],"speech,":[36,62,192],"musical":[37],"instruments":[38],"each":[40,46],"other,":[41],"or":[42],"sound":[43,65,74,197],"events":[44],"other.":[47],"However,":[48],"an":[50],"mixture":[52,183],"(e.g.,":[53],"movie":[54],"soundtrack)":[55],"into":[56],"the":[57,94,100,165,171,182],"three":[58,116,172],"broad":[59],"categories":[60],"music,":[63,188],"effects":[66],"(understood":[67],"to":[68,106,125,129,162],"include":[69],"ambient":[70],"noise":[71],"natural":[73],"events)":[75],"been":[77],"left":[78],"largely":[79],"unexplored,":[80],"despite":[81],"wide":[83],"range":[84],"potential":[86],"applications.":[87],"This":[88],"paper":[89],"formalizes":[90],"this":[91,110],"task":[92],"as":[93],"fork":[96],"problem,":[97],"presents":[99],"Divide":[101],"Remaster":[103],"(DnR)":[104],"dataset":[105],"foster":[107],"research":[108],"topic.":[111],"DnR":[112],"is":[113],"built":[114],"well-established":[117],"datasets":[119],"(LibriSpeech,":[120],"FMA,":[121],"FSD50k),":[122],"taking":[123],"care":[124],"reproduce":[126],"conditions":[127],"similar":[128],"professionally":[130],"produced":[131],"content":[132],"in":[133],"terms":[134],"overlap":[137],"relative":[139],"loudness,":[140],"made":[142],"available":[143],"CD":[145],"quality.":[146],"We":[147],"benchmark":[148],"standard":[149],"algorithms":[152],"DnR,":[154],"further":[156],"introduce":[157],"new":[159],"multi-resolution":[160],"model":[161,177],"better":[163],"address":[164],"variety":[166],"characteristics":[169],"types.":[174],"Our":[175],"best":[176],"produces":[178],"SI-SDR":[179],"improvements":[180],"over":[181],"11.0":[185],"dB":[186,190,195],"for":[187,191,196],"11.2":[189],"10.8":[194],"effects.":[198]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":2}],"updated_date":"2026-02-21T06:11:54.161237","created_date":"2025-10-10T00:00:00"}
