{"id":"https://openalex.org/W4392903555","doi":"https://doi.org/10.1109/icassp48485.2024.10446601","title":"GASS: Generalizing Audio Source Separation with Large-Scale Data","display_name":"GASS: Generalizing Audio Source Separation with Large-Scale Data","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903555","doi":"https://doi.org/10.1109/icassp48485.2024.10446601"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446601","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446601","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103137809","display_name":"Jordi Pons","orcid":"https://orcid.org/0000-0001-9603-0869"},"institutions":[{"id":"https://openalex.org/I116121188","display_name":"Dolby (Netherlands)","ror":"https://ror.org/02kb3q578","country_code":"NL","type":"company","lineage":["https://openalex.org/I116121188","https://openalex.org/I4210093996"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Jordi Pons","raw_affiliation_strings":["Dolby Laboratories"],"affiliations":[{"raw_affiliation_string":"Dolby Laboratories","institution_ids":["https://openalex.org/I116121188"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100455531","display_name":"Xiaoyu Liu","orcid":"https://orcid.org/0009-0005-6402-3855"},"institutions":[{"id":"https://openalex.org/I116121188","display_name":"Dolby (Netherlands)","ror":"https://ror.org/02kb3q578","country_code":"NL","type":"company","lineage":["https://openalex.org/I116121188","https://openalex.org/I4210093996"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Xiaoyu Liu","raw_affiliation_strings":["Dolby Laboratories"],"affiliations":[{"raw_affiliation_string":"Dolby Laboratories","institution_ids":["https://openalex.org/I116121188"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101557983","display_name":"Santiago Pascual","orcid":"https://orcid.org/0000-0002-8365-7387"},"institutions":[{"id":"https://openalex.org/I116121188","display_name":"Dolby (Netherlands)","ror":"https://ror.org/02kb3q578","country_code":"NL","type":"company","lineage":["https://openalex.org/I116121188","https://openalex.org/I4210093996"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Santiago Pascual","raw_affiliation_strings":["Dolby Laboratories"],"affiliations":[{"raw_affiliation_string":"Dolby Laboratories","institution_ids":["https://openalex.org/I116121188"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103092198","display_name":"Joan Serr\u00e0","orcid":"https://orcid.org/0000-0003-1303-6558"},"institutions":[{"id":"https://openalex.org/I116121188","display_name":"Dolby (Netherlands)","ror":"https://ror.org/02kb3q578","country_code":"NL","type":"company","lineage":["https://openalex.org/I116121188","https://openalex.org/I4210093996"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Joan Serr\u00e0","raw_affiliation_strings":["Dolby Laboratories"],"affiliations":[{"raw_affiliation_string":"Dolby Laboratories","institution_ids":["https://openalex.org/I116121188"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5103137809"],"corresponding_institution_ids":["https://openalex.org/I116121188"],"apc_list":null,"apc_paid":null,"fwci":5.6255,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.9664311,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"546","last_page":"550"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7993944883346558},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.7489787340164185},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6231549382209778},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5827438235282898},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.5626829862594604},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5403611063957214},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5112998485565186},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.45815810561180115},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40297016501426697},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.29582497477531433},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09093096852302551}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7993944883346558},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.7489787340164185},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6231549382209778},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5827438235282898},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.5626829862594604},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5403611063957214},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5112998485565186},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.45815810561180115},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40297016501426697},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29582497477531433},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09093096852302551},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446601","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446601","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2086381917","https://openalex.org/W2734774145","https://openalex.org/W2962865004","https://openalex.org/W2963992487","https://openalex.org/W2964058413","https://openalex.org/W2972541922","https://openalex.org/W2990666817","https://openalex.org/W2998490864","https://openalex.org/W2998657200","https://openalex.org/W3001377302","https://openalex.org/W3027008958","https://openalex.org/W3094550259","https://openalex.org/W3127686677","https://openalex.org/W3161934504","https://openalex.org/W3168140565","https://openalex.org/W3191469971","https://openalex.org/W3205879560","https://openalex.org/W4205689591","https://openalex.org/W4224871700","https://openalex.org/W4232282348","https://openalex.org/W4289665794","https://openalex.org/W4372260250","https://openalex.org/W4372269936","https://openalex.org/W4375928773","https://openalex.org/W4376632781","https://openalex.org/W4385822305","https://openalex.org/W6607890289","https://openalex.org/W6757202746","https://openalex.org/W6777776875","https://openalex.org/W6784499681","https://openalex.org/W6844662008","https://openalex.org/W6847370424","https://openalex.org/W6852871851","https://openalex.org/W6863079419"],"related_works":["https://openalex.org/W3162204513","https://openalex.org/W2071676784","https://openalex.org/W4292513318","https://openalex.org/W2371138613","https://openalex.org/W2048963458","https://openalex.org/W43109613","https://openalex.org/W2359952343","https://openalex.org/W2239445980","https://openalex.org/W4308092240","https://openalex.org/W2077498359"],"abstract_inverted_index":{"Universal":[0],"source":[1,31,64],"separation":[2,32,65,114,156],"targets":[3],"at":[4],"separating":[5,129],"the":[6,14,27,99,105,146,154],"audio":[7,63],"sources":[8],"of":[9,29,92,101],"an":[10],"arbitrary":[11],"mix,":[12],"removing":[13],"constraint":[15],"to":[16,69,126],"operate":[17],"on":[18,40,88,140],"a":[19,60,77,81,89],"specific":[20],"domain":[21],"like":[22],"speech":[23,113],"or":[24],"music.":[25],"Yet,":[26,119],"potential":[28,53],"universal":[30],"is":[33,121],"limited":[34],"because":[35],"most":[36],"existing":[37],"works":[38],"focus":[39],"mixes":[41],"with":[42,80],"predominantly":[43],"sound":[44,74,110],"events,":[45],"and":[46,73,104,112,132,143],"small":[47],"training":[48],"datasets":[49],"also":[50,136],"limit":[51],"its":[52,116],"for":[54,123,128],"supervised":[55,78],"learning.":[56],"Here,":[57],"we":[58],"study":[59],"single":[61],"general":[62],"(GASS)":[66],"model":[67],"trained":[68],"separate":[70],"speech,":[71],"music,":[72],"events":[75],"in":[76,109,161],"fashion":[79],"large-scale":[82],"dataset.":[83],"We":[84,135],"assess":[85],"GASS":[86,102,124,138],"models":[87,125,139,152],"diverse":[90],"set":[91],"tasks.":[93],"Our":[94],"strong":[95],"in-distribution":[96],"results":[97,160],"show":[98],"feasibility":[100],"models,":[103],"competitive":[106],"out-of-distribution":[107,130],"performance":[108],"event":[111],"shows":[115],"generalization":[117],"abilities.":[118],"it":[120],"challenging":[122],"generalize":[127],"cinematic":[131],"music":[133,155],"content.":[134],"fine-tune":[137],"each":[141],"dataset":[142],"consistently":[144],"outperform":[145],"ones":[147],"without":[148],"pre-training.":[149],"All":[150],"fine-tuned":[151],"(except":[153],"one)":[157],"obtain":[158],"state-of-the-art":[159],"their":[162],"respective":[163],"benchmarks.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
