{"id":"https://openalex.org/W4392903252","doi":"https://doi.org/10.1109/icassp48485.2024.10447422","title":"Unsupervised Multi-Channel Separation And Adaptation","display_name":"Unsupervised Multi-Channel Separation And Adaptation","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903252","doi":"https://doi.org/10.1109/icassp48485.2024.10447422"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447422","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447422","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070114472","display_name":"Cong Han","orcid":"https://orcid.org/0000-0003-2121-000X"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Cong Han","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087313939","display_name":"Kevin Wilson","orcid":"https://orcid.org/0000-0001-9141-2219"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin Wilson","raw_affiliation_strings":["Google"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053447215","display_name":"Scott Wisdom","orcid":"https://orcid.org/0000-0001-6671-1428"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Scott Wisdom","raw_affiliation_strings":["Google"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112763337","display_name":"John R. Hershey","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John R. Hershey","raw_affiliation_strings":["Google"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5070114472"],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":0.9877,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.71987281,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"721","last_page":"725"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7993904948234558},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.6644306182861328},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.6608837842941284},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6169718503952026},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5575893521308899},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5556751489639282},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.49285221099853516},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.49283719062805176},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.4832928478717804},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.43959885835647583},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.4341110289096832},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.43151727318763733},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.429510235786438},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4180770516395569},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.41582193970680237},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.378546804189682},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.28575214743614197},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.1362774670124054}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7993904948234558},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.6644306182861328},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.6608837842941284},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6169718503952026},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5575893521308899},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5556751489639282},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49285221099853516},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.49283719062805176},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.4832928478717804},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.43959885835647583},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.4341110289096832},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.43151727318763733},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.429510235786438},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4180770516395569},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.41582193970680237},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.378546804189682},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.28575214743614197},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.1362774670124054},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447422","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447422","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2113638573","https://openalex.org/W2125336414","https://openalex.org/W2289480995","https://openalex.org/W2460742184","https://openalex.org/W2517616541","https://openalex.org/W2563666542","https://openalex.org/W2589857635","https://openalex.org/W2892163332","https://openalex.org/W2899515918","https://openalex.org/W2933708090","https://openalex.org/W2944972166","https://openalex.org/W2952218014","https://openalex.org/W2963189033","https://openalex.org/W2964058413","https://openalex.org/W2998657200","https://openalex.org/W3004309045","https://openalex.org/W3015279216","https://openalex.org/W3015372568","https://openalex.org/W3015788098","https://openalex.org/W3016244615","https://openalex.org/W3093839391","https://openalex.org/W3099330747","https://openalex.org/W3151596526","https://openalex.org/W3162341667","https://openalex.org/W3206116612","https://openalex.org/W3207360934","https://openalex.org/W4224931722","https://openalex.org/W4296068764","https://openalex.org/W6762182681","https://openalex.org/W6784499681"],"related_works":["https://openalex.org/W3035557009","https://openalex.org/W3204418343","https://openalex.org/W2341113105","https://openalex.org/W2955172689","https://openalex.org/W3132602785","https://openalex.org/W3046182208","https://openalex.org/W2343346879","https://openalex.org/W2186589590","https://openalex.org/W4297818280","https://openalex.org/W772479628"],"abstract_inverted_index":{"A":[0],"key":[1],"challenge":[2],"in":[3,32],"machine":[4],"learning":[5,31,142],"is":[6],"to":[7,12,28,39,117,120],"generalize":[8],"from":[9,54],"training":[10,25,67],"data":[11],"an":[13],"application":[14],"domain":[15],"of":[16,48,102],"interest.":[17],"This":[18],"work":[19],"extends":[20],"the":[21,33,55,114],"recently-proposed":[22],"mixture":[23],"invariant":[24],"(MixIT)":[26],"algorithm":[27],"perform":[29],"unsupervised":[30,66,141],"multi-channel":[34,89,151],"setting.":[35],"We":[36],"use":[37,86],"MixIT":[38,144],"train":[40],"a":[41,87,103,107],"model":[42,104,146],"on":[43,62,72,106,133,148],"far-field":[44],"microphone":[45],"array":[46],"recordings":[47,75],"overlapping":[49,77],"reverberant":[50],"and":[51,65,69,109,119,126],"noisy":[52],"speech":[53,153],"AMI":[56,74,90],"Corpus.":[57],"The":[58],"models":[59,131],"are":[60,70],"trained":[61,132],"both":[63,149],"supervised":[64,130],"data,":[68],"tested":[71],"real":[73,127],"containing":[76],"speech.":[78],"To":[79],"objectively":[80],"evaluate":[81],"our":[82],"models,":[83],"we":[84,97],"also":[85],"synthetic":[88,125,135],"test":[91],"set.":[92],"Holding":[93],"network":[94],"architectures":[95],"constant,":[96],"find":[98],"that":[99,140],"semi-supervised":[100],"fine-tuning":[101],"pretrained":[105],"large":[108],"diverse":[110],"single-channel":[111],"dataset":[112],"yields":[113],"largest":[115],"improvement":[116],"SI-SNR":[118],"human":[121],"listening":[122],"ratings":[123],"across":[124],"datasets,":[128],"outperforming":[129],"well-matched":[134],"data.":[136],"Our":[137],"results":[138],"demonstrate":[139],"through":[143],"enables":[145],"adaptation":[147],"single-and":[150],"real-world":[152],"recordings.":[154]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
