{"id":"https://openalex.org/W4221145205","doi":"https://doi.org/10.1109/taslp.2022.3195112","title":"DBT-Net: Dual-Branch Federative Magnitude and Phase Estimation With Attention-in-Attention Transformer for Monaural Speech Enhancement","display_name":"DBT-Net: Dual-Branch Federative Magnitude and Phase Estimation With Attention-in-Attention Transformer for Monaural Speech Enhancement","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4221145205","doi":"https://doi.org/10.1109/taslp.2022.3195112"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2022.3195112","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3195112","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025635655","display_name":"Guochen Yu","orcid":"https://orcid.org/0000-0002-7179-1044"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guochen Yu","raw_affiliation_strings":["State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7179-1044","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053757616","display_name":"Andong Li","orcid":"https://orcid.org/0000-0003-4094-8448"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Andong Li","raw_affiliation_strings":["Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100460917","display_name":"Hui Wang","orcid":"https://orcid.org/0000-0003-2633-6015"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Wang","raw_affiliation_strings":["State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004298736","display_name":"Yutian Wang","orcid":"https://orcid.org/0000-0001-8383-8762"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yutian Wang","raw_affiliation_strings":["State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-8383-8762","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087876341","display_name":"Yuxuan Ke","orcid":"https://orcid.org/0000-0003-3486-0298"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxuan Ke","raw_affiliation_strings":["Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070266277","display_name":"Chengshi Zheng","orcid":"https://orcid.org/0000-0001-5656-994X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengshi Zheng","raw_affiliation_strings":["Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5656-994X","affiliations":[{"raw_affiliation_string":"Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5025635655"],"corresponding_institution_ids":["https://openalex.org/I75689368"],"apc_list":null,"apc_paid":null,"fwci":6.9933,"has_fulltext":false,"cited_by_count":49,"citation_normalized_percentile":{"value":0.97856905,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"30","issue":null,"first_page":"2629","last_page":"2644"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7558482885360718},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.584867537021637},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5039767622947693},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.47027045488357544},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46788614988327026},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4083523452281952},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32723650336265564},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3267263174057007},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09773442149162292}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7558482885360718},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.584867537021637},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5039767622947693},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.47027045488357544},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46788614988327026},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4083523452281952},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32723650336265564},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3267263174057007},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09773442149162292},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2022.3195112","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3195112","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W165956390","https://openalex.org/W1552314771","https://openalex.org/W1563795667","https://openalex.org/W1974387177","https://openalex.org/W1983108229","https://openalex.org/W1988550194","https://openalex.org/W2024490156","https://openalex.org/W2044893557","https://openalex.org/W2048142341","https://openalex.org/W2067295501","https://openalex.org/W2069681747","https://openalex.org/W2070126272","https://openalex.org/W2094721231","https://openalex.org/W2144404214","https://openalex.org/W2291877678","https://openalex.org/W2364134690","https://openalex.org/W2476548250","https://openalex.org/W2516001803","https://openalex.org/W2603567530","https://openalex.org/W2802304149","https://openalex.org/W2889442120","https://openalex.org/W2897371647","https://openalex.org/W2929481141","https://openalex.org/W2937484199","https://openalex.org/W2940177920","https://openalex.org/W2943554574","https://openalex.org/W2949756029","https://openalex.org/W2952218014","https://openalex.org/W2952979007","https://openalex.org/W2962866211","https://openalex.org/W2963189033","https://openalex.org/W2963341071","https://openalex.org/W2964089206","https://openalex.org/W2972948005","https://openalex.org/W2991361823","https://openalex.org/W2998161426","https://openalex.org/W2998445964","https://openalex.org/W3015199127","https://openalex.org/W3015219411","https://openalex.org/W3015312544","https://openalex.org/W3016129867","https://openalex.org/W3017350693","https://openalex.org/W3032514799","https://openalex.org/W3034763882","https://openalex.org/W3096008106","https://openalex.org/W3096408984","https://openalex.org/W3096893582","https://openalex.org/W3097906045","https://openalex.org/W3097945073","https://openalex.org/W3099330747","https://openalex.org/W3120336970","https://openalex.org/W3134695619","https://openalex.org/W3161480375","https://openalex.org/W3161950572","https://openalex.org/W3162493033","https://openalex.org/W3197260772","https://openalex.org/W3197729725","https://openalex.org/W3197912330","https://openalex.org/W3200407957","https://openalex.org/W3201698955","https://openalex.org/W3206809722","https://openalex.org/W3213188934","https://openalex.org/W4224916670","https://openalex.org/W4224934178","https://openalex.org/W4253928870","https://openalex.org/W4365420402","https://openalex.org/W6631190155","https://openalex.org/W6633802082","https://openalex.org/W6696085341","https://openalex.org/W6731370813","https://openalex.org/W6739901393","https://openalex.org/W6757632829","https://openalex.org/W6762114000","https://openalex.org/W6798972960"],"related_works":["https://openalex.org/W2905433371","https://openalex.org/W4390569940","https://openalex.org/W4361193272","https://openalex.org/W4310278675","https://openalex.org/W4388422664","https://openalex.org/W2806259446","https://openalex.org/W1986582023","https://openalex.org/W2883749686","https://openalex.org/W2966829450","https://openalex.org/W4315864862"],"abstract_inverted_index":{"The":[0],"decoupling-style":[1],"concept":[2],"begins":[3],"to":[4,82,101,130,139,199],"ignite":[5],"in":[6,33,70,88,113,237],"the":[7,13,24,61,67,73,76,89,93,103,110,114,120,141,146,150,156,214,225],"speech":[8,56,240],"enhancement":[9],"area,":[10],"which":[11],"decouples":[12],"original":[14],"complex":[15,29,94],"spectrum":[16,30,69,95],"estimation":[17,50,78],"task":[18],"into":[19],"multiple":[20],"easier":[21,37],"sub-tasks":[22],"(<i>i.e.</i>,":[23],"magnitude-only":[25],"recovery":[26],"and":[27,36,48,63,107,144,159,192,204,218,233,242],"residual":[28],"estimation),":[31],"resulting":[32],"better":[34,177],"performance":[35,236],"interpretability.":[38],"In":[39],"this":[40],"paper,":[41],"we":[42,166],"propose":[43],"a":[44,168],"dual-branch":[45],"federative":[46],"magnitude":[47,77,90],"phase":[49,111],"framework,":[51],"dubbed":[52],"DBT-Net,":[53],"for":[54,163,176],"monaural":[55],"enhancement,":[57],"aiming":[58,198],"at":[59],"recovering":[60],"coarse-":[62],"fine-grained":[64],"regions":[65],"of":[66,149,154,185,239],"overall":[68],"parallel.":[71],"From":[72],"complementary":[74],"perspective,":[75],"branch":[79,97,175],"is":[80,98,183],"designed":[81,100],"filter":[83],"out":[84],"dominant":[85],"noise":[86],"components":[87,148],"domain,":[91],"while":[92],"purification":[96],"elaborately":[99],"inpaint":[102],"missing":[104,147],"spectral":[105,116],"details":[106],"implicitly":[108],"estimate":[109],"information":[112,121],"complex-valued":[115],"domain.":[117],"To":[118],"facilitate":[119],"flow":[122],"between":[123],"each":[124,174],"branch,":[125,136],"interaction":[126],"modules":[127,191],"are":[128],"introduced":[129],"leverage":[131],"features":[132],"learned":[133],"from":[134],"one":[135],"so":[137],"as":[138],"suppress":[140],"undesired":[142],"parts":[143],"recover":[145],"other":[151],"branch.":[152],"Instead":[153],"adopting":[155],"conventional":[157],"RNNs":[158],"temporal":[160],"convolutional":[161],"networks":[162],"sequence":[164],"modeling,":[165],"employ":[167],"novel":[169],"attention-in-attention":[170],"transformer-based":[171,190],"network":[172],"within":[173],"feature":[178],"learning.":[179],"More":[180],"specially,":[181],"it":[182],"composed":[184],"several":[186],"adaptive":[187,194],"spectro-temporal":[188],"attention":[189,196],"an":[193],"hierarchical":[195,208],"module,":[197],"capture":[200],"long-term":[201],"time-frequency":[202],"dependencies":[203],"further":[205],"aggregate":[206],"intermediate":[207],"contextual":[209],"information.":[210],"Comprehensive":[211],"evaluations":[212],"on":[213],"WSJ0-SI84":[215],"+":[216,220],"DNS-Challenge":[217],"VoiceBank":[219],"DEMAND":[221],"dataset":[222],"demonstrate":[223],"that":[224],"proposed":[226],"approach":[227],"consistently":[228],"outperforms":[229],"previous":[230],"advanced":[231],"systems":[232],"yields":[234],"state-of-the-art":[235],"terms":[238],"quality":[241],"intelligibility.":[243]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":8}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
