{"id":"https://openalex.org/W3003441391","doi":"https://doi.org/10.1109/icassp40776.2020.9053989","title":"Channel-Attention Dense U-Net for Multichannel Speech Enhancement","display_name":"Channel-Attention Dense U-Net for Multichannel Speech Enhancement","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3003441391","doi":"https://doi.org/10.1109/icassp40776.2020.9053989","mag":"3003441391"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053989","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053989","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2001.11542","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088730122","display_name":"Bahareh Tolooshams","orcid":"https://orcid.org/0000-0002-5955-6535"},"institutions":[{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bahareh Tolooshams","raw_affiliation_strings":["School of Engineering and Applied Sciences, Harvard University, Cambridge, MA","Harvard University, School of Engineering and Applied Sciences, Cambridge, MA"],"affiliations":[{"raw_affiliation_string":"School of Engineering and Applied Sciences, Harvard University, Cambridge, MA","institution_ids":["https://openalex.org/I136199984"]},{"raw_affiliation_string":"Harvard University, School of Engineering and Applied Sciences, Cambridge, MA","institution_ids":["https://openalex.org/I136199984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091775011","display_name":"Ritwik Giri","orcid":"https://orcid.org/0000-0002-8599-3229"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ritwik Giri","raw_affiliation_strings":["Amazon Web Services, Palo Alto, CA","[Amazon Web Services, Palo Alto, CA]"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, Palo Alto, CA","institution_ids":[]},{"raw_affiliation_string":"[Amazon Web Services, Palo Alto, CA]","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066569995","display_name":"Andrew H. Song","orcid":"https://orcid.org/0000-0001-9356-9156"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew H. Song","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, MA","Massachusetts Institute of Technology, Cambridge, Ma#TAB#"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, MA","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, Ma#TAB#","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036092393","display_name":"Umut Isik","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Umut Isik","raw_affiliation_strings":["Amazon Web Services, Palo Alto, CA","[Amazon Web Services, Palo Alto, CA]"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, Palo Alto, CA","institution_ids":[]},{"raw_affiliation_string":"[Amazon Web Services, Palo Alto, CA]","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112751528","display_name":"Arvindh Krishnaswamy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arvindh Krishnaswamy","raw_affiliation_strings":["Amazon Web Services, Palo Alto, CA","[Amazon Web Services, Palo Alto, CA]"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, Palo Alto, CA","institution_ids":[]},{"raw_affiliation_string":"[Amazon Web Services, Palo Alto, CA]","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5088730122"],"corresponding_institution_ids":["https://openalex.org/I136199984"],"apc_list":null,"apc_paid":null,"fwci":1.2182,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.78508772,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"836","last_page":"840"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7583439350128174},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7175572514533997},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6335873603820801},{"id":"https://openalex.org/keywords/beamforming","display_name":"Beamforming","score":0.6149532794952393},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.6001210808753967},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.587748110294342},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5623210668563843},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5618894100189209},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5022060871124268},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5017662048339844},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.4859722852706909},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32852715253829956},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.12594765424728394},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.07688787579536438}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7583439350128174},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7175572514533997},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6335873603820801},{"id":"https://openalex.org/C54197355","wikidata":"https://www.wikidata.org/wiki/Q5782992","display_name":"Beamforming","level":2,"score":0.6149532794952393},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.6001210808753967},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.587748110294342},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5623210668563843},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5618894100189209},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5022060871124268},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5017662048339844},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.4859722852706909},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32852715253829956},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.12594765424728394},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.07688787579536438},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053989","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053989","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2001.11542","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.11542","pdf_url":"https://arxiv.org/pdf/2001.11542","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2001.11542","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2001.11542","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"doi:10.17023/zx4v-bd38","is_oa":true,"landing_page_url":"https://doi.org/10.17023/zx4v-bd38","pdf_url":null,"source":{"id":"https://openalex.org/S7407051697","display_name":"IEEE RESOURCE CENTERS","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:3003441391","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2001.11542","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.11542","pdf_url":"https://arxiv.org/pdf/2001.11542","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.7200000286102295,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3003441391.pdf","grobid_xml":"https://content.openalex.org/works/W3003441391.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2061074721","https://openalex.org/W2127851351","https://openalex.org/W2133564696","https://openalex.org/W2289394825","https://openalex.org/W2291877678","https://openalex.org/W2398042854","https://openalex.org/W2413794162","https://openalex.org/W2517616541","https://openalex.org/W2805288670","https://openalex.org/W2892163332","https://openalex.org/W2900381824","https://openalex.org/W2900893004","https://openalex.org/W2923728956","https://openalex.org/W2937581095","https://openalex.org/W2944972166","https://openalex.org/W2950893734","https://openalex.org/W2962866211","https://openalex.org/W2963446712","https://openalex.org/W2963750251","https://openalex.org/W2975955714","https://openalex.org/W2998678989","https://openalex.org/W6679434410","https://openalex.org/W6726313835","https://openalex.org/W6751512325","https://openalex.org/W6752378368","https://openalex.org/W6757632829","https://openalex.org/W6762182681"],"related_works":["https://openalex.org/W3015791598","https://openalex.org/W2766930522","https://openalex.org/W3205770165","https://openalex.org/W3206809722","https://openalex.org/W3202198001","https://openalex.org/W3092211318","https://openalex.org/W3138270850","https://openalex.org/W2946362479","https://openalex.org/W3022234800","https://openalex.org/W2944972166","https://openalex.org/W2994323562","https://openalex.org/W3030809813","https://openalex.org/W2962981131","https://openalex.org/W2726957814","https://openalex.org/W3209472002","https://openalex.org/W3096571892","https://openalex.org/W2963622214","https://openalex.org/W3164401850","https://openalex.org/W2986582408","https://openalex.org/W2765654763"],"abstract_inverted_index":{"Supervised":[0],"deep":[1,13,76,122],"learning":[2,14,20],"has":[3],"gained":[4],"significant":[5],"attention":[6],"for":[7,87],"speech":[8],"enhancement":[9],"recently.":[10],"The":[11],"state-of-the-art":[12,166],"methods":[15,61],"perform":[16,153],"the":[17,28,31,36,40,44,53,57,66,75,106,109,121,136,147,150,158,162,165,169],"task":[18],"by":[19,97,115],"a":[21,79,117],"ratio/binary":[22],"mask":[23],"that":[24],"is":[25],"applied":[26],"to":[27,34,64,124,152],"mixture":[29],"in":[30,43,50,52,132],"time-frequency":[32],"domain":[33],"produce":[35],"clean":[37],"speech.":[38],"Despite":[39],"great":[41],"performance":[42,51,160],"single-channel":[45],"setting,":[46],"these":[47,60,94],"frameworks":[48],"lag":[49],"multichannel":[54,88],"setting":[55],"as":[56,78],"majority":[58],"of":[59,103,108,146,161],"a)":[62,96],"fail":[63],"exploit":[65],"available":[67],"spatial":[68],"information":[69],"fully,":[70],"and":[71,111],"b)":[72,114],"still":[73],"treat":[74],"architecture":[77,123],"black":[80],"box":[81],"which":[82,133],"may":[83],"not":[84],"be":[85],"well-suited":[86],"audio":[89],"processing.":[90],"This":[91],"paper":[92],"addresses":[93],"drawbacks,":[95],"utilizing":[98],"complex":[99],"ratio":[100],"masking":[101,104],"instead":[102],"on":[105,140,168],"magnitude":[107],"spectrogram,":[110],"more":[112],"importantly,":[113],"introducing":[116],"channel-attention":[118,137],"mechanism":[119],"inside":[120],"mimic":[125],"beamforming.":[126,155],"We":[127,156],"propose":[128],"Channel-Attention":[129],"Dense":[130],"U-Net,":[131],"we":[134],"apply":[135],"unit":[138],"recursively":[139],"feature":[141],"maps":[142],"at":[143],"every":[144],"layer":[145],"network,":[148],"enabling":[149],"network":[151,163],"non-linear":[154],"demonstrate":[157],"superior":[159],"against":[164],"approaches":[167],"CHiME-3":[170],"dataset.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
