{"id":"https://openalex.org/W3168140565","doi":"https://doi.org/10.1109/waspaa52581.2021.9632714","title":"Sparse, Efficient, and Semantic Mixture Invariant Training: Taming In-the-Wild Unsupervised Sound Separation","display_name":"Sparse, Efficient, and Semantic Mixture Invariant Training: Taming In-the-Wild Unsupervised Sound Separation","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3168140565","doi":"https://doi.org/10.1109/waspaa52581.2021.9632714","mag":"3168140565"},"language":"en","primary_location":{"id":"doi:10.1109/waspaa52581.2021.9632714","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa52581.2021.9632714","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053447215","display_name":"Scott Wisdom","orcid":"https://orcid.org/0000-0001-6671-1428"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Scott Wisdom","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103622427","display_name":"Aren Jansen","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aren Jansen","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103273436","display_name":"Ron J. Weiss","orcid":"https://orcid.org/0000-0003-2010-4053"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ron J. Weiss","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065994318","display_name":"Hakan Erdo\u011fan","orcid":"https://orcid.org/0000-0003-3140-8642"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hakan Erdogan","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112763337","display_name":"John R. Hershey","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John R. Hershey","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5053447215"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":2.30367241,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.8975932,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"51","last_page":"55"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10822","display_name":"Acoustic Wave Phenomena Research","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6711589097976685},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.6344816088676453},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.6070288419723511},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.5148190855979919},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.48996391892433167},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4575272500514984},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.45063796639442444},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.44814205169677734},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4438932538032532},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.42577847838401794},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41812416911125183},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3946344554424286},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35357552766799927},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22779381275177002}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6711589097976685},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.6344816088676453},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.6070288419723511},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5148190855979919},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.48996391892433167},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4575272500514984},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45063796639442444},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.44814205169677734},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4438932538032532},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.42577847838401794},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41812416911125183},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3946344554424286},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35357552766799927},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22779381275177002},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/waspaa52581.2021.9632714","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa52581.2021.9632714","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1897240248","https://openalex.org/W1987075379","https://openalex.org/W2031647436","https://openalex.org/W2118718620","https://openalex.org/W2135046866","https://openalex.org/W2138019504","https://openalex.org/W2166159048","https://openalex.org/W2221409856","https://openalex.org/W2222512263","https://openalex.org/W2250384498","https://openalex.org/W2317475341","https://openalex.org/W2398826216","https://openalex.org/W2460742184","https://openalex.org/W2593116425","https://openalex.org/W2962715207","https://openalex.org/W2963189033","https://openalex.org/W2964058413","https://openalex.org/W2964121744","https://openalex.org/W2990666817","https://openalex.org/W2998657200","https://openalex.org/W3027008958","https://openalex.org/W3093839391","https://openalex.org/W3100954996","https://openalex.org/W3151596526","https://openalex.org/W3161934504","https://openalex.org/W6631190155","https://openalex.org/W6677759377","https://openalex.org/W6734260513","https://openalex.org/W6777776875","https://openalex.org/W6784499681"],"related_works":["https://openalex.org/W1529840045","https://openalex.org/W4244036394","https://openalex.org/W2135107501","https://openalex.org/W1842879116","https://openalex.org/W2047248895","https://openalex.org/W1822895636","https://openalex.org/W2077498359","https://openalex.org/W1999699871","https://openalex.org/W4225124612","https://openalex.org/W2043806667"],"abstract_inverted_index":{"Supervised":[0],"neural":[1],"network":[2],"training":[3,39,43],"has":[4],"led":[5],"to":[6,24,60,189],"significant":[7],"progress":[8,31],"on":[9,16,32,44],"single-channel":[10],"sound":[11],"separation.":[12],"This":[13],"approach":[14],"relies":[15],"ground":[17],"truth":[18],"isolated":[19],"sources,":[20,138,178],"which":[21,58],"precludes":[22],"scaling":[23],"widely":[25],"available":[26],"mixture":[27,37],"data":[28],"and":[29,109,164],"limits":[30,81],"open-domain":[33],"tasks.":[34],"The":[35,168],"recent":[36],"invariant":[38],"(MixIT)":[40],"method":[41],"enables":[42],"in-the-wild":[45],"data;":[46],"however,":[47],"it":[48,55],"suffers":[49],"from":[50],"two":[51],"outstanding":[52],"problems.":[53],"First,":[54],"produces":[56],"models":[57],"tend":[59],"over-separate,":[61],"producing":[62],"more":[63],"output":[64,86,107,177],"sources":[65,108],"than":[66],"are":[67],"present":[68],"in":[69,202],"the":[70,73,78,82,151,159,193],"input.":[71],"Second,":[72],"exponential":[74],"computational":[75],"complexity":[76],"of":[77,84,137,176],"MixIT":[79,152,183],"loss":[80,112,124],"number":[83],"feasible":[85],"sources.":[87],"In":[88],"this":[89],"paper":[90],"we":[91,98,139,197],"address":[92],"both":[93],"issues.":[94],"To":[95,133],"combat":[96],"over-separation":[97,163],"introduce":[99,140],"new":[100],"losses:":[101],"sparsity":[102,187],"losses":[103,161,188],"that":[104,113,158],"favor":[105],"fewer":[106],"a":[110,121,145],"covariance":[111],"discourages":[114],"correlated":[115],"outputs.":[116],"We":[117],"also":[118],"experiment":[119],"with":[120,186],"semantic":[122],"classification":[123],"by":[125,180,211],"predicting":[126],"weak":[127],"class":[128],"labels":[129],"for":[130],"each":[131],"mixture.":[132],"handle":[134],"larger":[135,174],"numbers":[136,175],"an":[141],"efficient":[142,182],"approximation":[143],"using":[144,173],"fast":[146],"least-squares":[147],"solution,":[148],"projected":[149],"onto":[150],"constraint":[153],"set.":[154],"Our":[155],"experiments":[156],"show":[157],"proposed":[160],"curtail":[162],"improve":[165],"overall":[166],"performance.":[167],"best":[169],"performance":[170],"is":[171],"achieved":[172],"enabled":[179],"our":[181],"loss,":[184],"combined":[185],"prevent":[190],"over-separation.":[191],"On":[192],"FUSS":[194],"test":[195],"set,":[196],"achieve":[198],"over":[199,212],"13":[200],"dB":[201],"multi-source":[203],"SI-SNR":[204,210],"improvement,":[205],"while":[206],"boosting":[207],"single-source":[208],"reconstruction":[209],"17":[213],"dB.":[214]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":6}],"updated_date":"2025-11-25T21:42:39.735039","created_date":"2025-10-10T00:00:00"}
