{"id":"https://openalex.org/W3206809722","doi":"https://doi.org/10.1109/icassp43922.2022.9746273","title":"Dual-Branch Attention-In-Attention Transformer for Single-Channel Speech Enhancement","display_name":"Dual-Branch Attention-In-Attention Transformer for Single-Channel Speech Enhancement","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3206809722","doi":"https://doi.org/10.1109/icassp43922.2022.9746273","mag":"3206809722"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746273","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746273","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025635655","display_name":"Guochen Yu","orcid":"https://orcid.org/0000-0002-7179-1044"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4391768176","display_name":"State Key Laboratory of Media Convergence and Communication","ror":"https://ror.org/0595ys057","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391768176","https://openalex.org/I75689368"]},{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guochen Yu","raw_affiliation_strings":["Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China","State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","Chinese Academy of Sciences, Institute of Acoustics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China","institution_ids":["https://openalex.org/I75689368","https://openalex.org/I4391768176"]},{"raw_affiliation_string":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Acoustics, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053757616","display_name":"Andong Li","orcid":"https://orcid.org/0000-0003-4094-8448"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Andong Li","raw_affiliation_strings":["Institute of Acoustics,Chinese Academy of Sciences,Beijing,China","Chinese Academy of Sciences, Institute of Acoustics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Acoustics, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070266277","display_name":"Chengshi Zheng","orcid":"https://orcid.org/0000-0001-5656-994X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengshi Zheng","raw_affiliation_strings":["Institute of Acoustics,Chinese Academy of Sciences,Beijing,China","Chinese Academy of Sciences, Institute of Acoustics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Acoustics, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016681207","display_name":"Yinuo Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yinuo Guo","raw_affiliation_strings":["Bytedance,Beijing,China","Bytedance, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Bytedance,Beijing,China","institution_ids":[]},{"raw_affiliation_string":"Bytedance, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100741276","display_name":"Yutian Wang","orcid":"https://orcid.org/0009-0008-8451-600X"},"institutions":[{"id":"https://openalex.org/I4391768176","display_name":"State Key Laboratory of Media Convergence and Communication","ror":"https://ror.org/0595ys057","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391768176","https://openalex.org/I75689368"]},{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yutian Wang","raw_affiliation_strings":["Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China","State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China","institution_ids":["https://openalex.org/I75689368","https://openalex.org/I4391768176"]},{"raw_affiliation_string":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100460841","display_name":"Hui Wang","orcid":"https://orcid.org/0000-0002-8476-888X"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]},{"id":"https://openalex.org/I4391768176","display_name":"State Key Laboratory of Media Convergence and Communication","ror":"https://ror.org/0595ys057","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391768176","https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Wang","raw_affiliation_strings":["Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China","State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China","institution_ids":["https://openalex.org/I75689368","https://openalex.org/I4391768176"]},{"raw_affiliation_string":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5025635655"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210099069","https://openalex.org/I4391768176","https://openalex.org/I75689368"],"apc_list":null,"apc_paid":null,"fwci":12.1539,"has_fulltext":false,"cited_by_count":102,"citation_normalized_percentile":{"value":0.99150142,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"7847","last_page":"7851"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7373716831207275},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.716023325920105},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5684275031089783},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.49316662549972534},{"id":"https://openalex.org/keywords/pesq","display_name":"PESQ","score":0.47769787907600403},{"id":"https://openalex.org/keywords/attention-network","display_name":"Attention network","score":0.4453502595424652},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4178313910961151},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11053985357284546},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.0940137505531311}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7373716831207275},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.716023325920105},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5684275031089783},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.49316662549972534},{"id":"https://openalex.org/C103734657","wikidata":"https://www.wikidata.org/wiki/Q2739975","display_name":"PESQ","level":4,"score":0.47769787907600403},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.4453502595424652},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4178313910961151},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11053985357284546},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0940137505531311},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746273","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746273","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7799999713897705,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W1495679096","https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W1983108229","https://openalex.org/W2044893557","https://openalex.org/W2067295501","https://openalex.org/W2069681747","https://openalex.org/W2070126272","https://openalex.org/W2094721231","https://openalex.org/W2144404214","https://openalex.org/W2476548250","https://openalex.org/W2603567530","https://openalex.org/W2802304149","https://openalex.org/W2906042495","https://openalex.org/W2937484199","https://openalex.org/W2943895317","https://openalex.org/W2949558265","https://openalex.org/W2949756029","https://openalex.org/W2962866211","https://openalex.org/W2963341071","https://openalex.org/W2991361823","https://openalex.org/W2998161426","https://openalex.org/W2998445964","https://openalex.org/W3015219411","https://openalex.org/W3016129867","https://openalex.org/W3036367741","https://openalex.org/W3045728954","https://openalex.org/W3045904949","https://openalex.org/W3046669506","https://openalex.org/W3096008106","https://openalex.org/W3096408984","https://openalex.org/W3096893582","https://openalex.org/W3097945073","https://openalex.org/W3120336970","https://openalex.org/W3141443761","https://openalex.org/W3161950572","https://openalex.org/W3162493033","https://openalex.org/W3173999841","https://openalex.org/W3186301694","https://openalex.org/W3190223472","https://openalex.org/W3197260772","https://openalex.org/W3197729725","https://openalex.org/W3197912330","https://openalex.org/W3201698955","https://openalex.org/W3213188934","https://openalex.org/W4253928870","https://openalex.org/W6631190155","https://openalex.org/W6667372801","https://openalex.org/W6720887540","https://openalex.org/W6735429107","https://openalex.org/W6757632829","https://openalex.org/W6762114000","https://openalex.org/W6796856108","https://openalex.org/W6798972960"],"related_works":["https://openalex.org/W3016109656","https://openalex.org/W2058482658","https://openalex.org/W1973895194","https://openalex.org/W3135613579","https://openalex.org/W4388016426","https://openalex.org/W1980687383","https://openalex.org/W4386746628","https://openalex.org/W2546593254","https://openalex.org/W2166831097","https://openalex.org/W3209446892"],"abstract_inverted_index":{"Curriculum":[0],"learning":[1],"begins":[2],"to":[3,21,36,58,74,96,128],"thrive":[4],"in":[5,46],"the":[6,12,44,61,77,98,110],"speech":[7],"enhancement":[8],"area,":[9],"which":[10],"decouples":[11],"original":[13],"spectrum":[14,45],"estimation":[15],"task":[16],"into":[17],"multiple":[18],"easier":[19],"sub-tasks":[20],"achieve":[22],"better":[23],"performance.":[24],"Motivated":[25],"by":[26],"that,":[27],"we":[28,89],"propose":[29,90],"a":[30,49,52,67,91,166],"dual-branch":[31],"attention-in-attention":[32,93,112],"transformer":[33,113,119],"dubbed":[34],"DB-AIAT":[35,149],"handle":[37],"both":[38],"coarse-":[39],"and":[40,65,81,101,121,133,158],"fine-grained":[41],"regions":[42],"of":[43,115],"parallel.":[47],"From":[48],"complementary":[50],"perspective,":[51],"magnitude":[53,63],"masking":[54],"branch":[55,70],"is":[56,71],"proposed":[57,111],"coarsely":[59],"estimate":[60],"overall":[62],"spectrum,":[64],"simultaneously":[66],"complex":[68],"refining":[69],"elaborately":[72],"designed":[73],"compensate":[75],"for":[76,105],"missing":[78],"spectral":[79],"details":[80],"implicitly":[82],"derive":[83],"phase":[84],"information.":[85,139],"Within":[86],"each":[87],"branch,":[88],"novel":[92],"transformer-based":[94],"module":[95],"replace":[97],"conventional":[99],"RNNs":[100],"temporal":[102,106],"convolutional":[103],"networks":[104],"sequence":[107],"modeling.":[108],"Specifically,":[109],"consists":[114],"adaptive":[116,123],"temporal-frequency":[117,131],"attention":[118,125],"blocks":[120],"an":[122],"hierarchical":[124,137],"module,":[126],"aiming":[127],"capture":[129],"long-term":[130],"dependencies":[132],"further":[134],"aggregate":[135],"global":[136],"contextual":[138],"Experimental":[140],"results":[141],"on":[142],"Voice":[143],"Bank":[144],"+":[145],"DEMAND":[146],"demonstrate":[147],"that":[148],"yields":[150],"state-of-the-art":[151],"performance":[152],"(e.g.,":[153],"3.31":[154],"PESQ,":[155],"95.6%":[156],"STOI":[157],"10.79dB":[159],"SSNR)":[160],"over":[161],"previous":[162],"advanced":[163],"systems":[164],"with":[165],"relatively":[167],"small":[168],"model":[169],"size":[170],"(2.81M).":[171]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":36},{"year":2024,"cited_by_count":29},{"year":2023,"cited_by_count":26},{"year":2022,"cited_by_count":9}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
