{"id":"https://openalex.org/W4319862644","doi":"https://doi.org/10.1109/slt54892.2023.10023174","title":"TEA-PSE 2.0: Sub-Band Network for Real-Time Personalized Speech Enhancement","display_name":"TEA-PSE 2.0: Sub-Band Network for Real-Time Personalized Speech Enhancement","publication_year":2023,"publication_date":"2023-01-09","ids":{"openalex":"https://openalex.org/W4319862644","doi":"https://doi.org/10.1109/slt54892.2023.10023174"},"language":"en","primary_location":{"id":"doi:10.1109/slt54892.2023.10023174","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10023174","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015448985","display_name":"Yukai Ju","orcid":null},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]},{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yukai Ju","raw_affiliation_strings":["Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU),Xi&#x0027;an,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU),Xi&#x0027;an,China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100630316","display_name":"Shimin Zhang","orcid":"https://orcid.org/0009-0004-4344-5218"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shimin Zhang","raw_affiliation_strings":["Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU),Xi&#x0027;an,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU),Xi&#x0027;an,China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035226758","display_name":"Wei Rao","orcid":"https://orcid.org/0000-0002-7237-0874"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Rao","raw_affiliation_strings":["Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084128157","display_name":"Yannan Wang","orcid":"https://orcid.org/0000-0001-7248-4954"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yannan Wang","raw_affiliation_strings":["Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057697869","display_name":"Tao Yu","orcid":"https://orcid.org/0000-0002-0143-261X"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Yu","raw_affiliation_strings":["Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100668966","display_name":"Lei Xie","orcid":"https://orcid.org/0000-0001-8234-0823"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Xie","raw_affiliation_strings":["Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU),Xi&#x0027;an,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,Audio, Speech and Language Processing Group (ASLP@NPU),Xi&#x0027;an,China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078353046","display_name":"Shidong Shang","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shidong Shang","raw_affiliation_strings":["Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8789,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.90625,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"472","last_page":"479"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7538977861404419},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.7390663623809814},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6433977484703064},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5864115953445435},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.48696479201316833},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.48140931129455566},{"id":"https://openalex.org/keywords/finite-impulse-response","display_name":"Finite impulse response","score":0.4722663164138794},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.44492509961128235},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4109887480735779},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.26981085538864136},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.2375788390636444},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1544169783592224},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.11049520969390869}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7538977861404419},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.7390663623809814},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6433977484703064},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5864115953445435},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.48696479201316833},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.48140931129455566},{"id":"https://openalex.org/C198386975","wikidata":"https://www.wikidata.org/wiki/Q117785","display_name":"Finite impulse response","level":2,"score":0.4722663164138794},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.44492509961128235},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4109887480735779},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26981085538864136},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.2375788390636444},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1544169783592224},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.11049520969390869}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt54892.2023.10023174","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10023174","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.699999988079071,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2117678320","https://openalex.org/W2141998673","https://openalex.org/W2172065531","https://openalex.org/W2516001803","https://openalex.org/W2593116425","https://openalex.org/W2951130829","https://openalex.org/W2952218014","https://openalex.org/W2963750251","https://openalex.org/W2963868408","https://openalex.org/W2964058413","https://openalex.org/W2973062255","https://openalex.org/W3016361963","https://openalex.org/W3024869864","https://openalex.org/W3092864146","https://openalex.org/W3094893721","https://openalex.org/W3096090308","https://openalex.org/W3097538987","https://openalex.org/W3097653961","https://openalex.org/W3099330747","https://openalex.org/W3103434036","https://openalex.org/W3144035034","https://openalex.org/W3160973314","https://openalex.org/W3162493033","https://openalex.org/W3194338569","https://openalex.org/W3195288392","https://openalex.org/W3197260772","https://openalex.org/W3198543387","https://openalex.org/W3198680319","https://openalex.org/W4221155601","https://openalex.org/W4221157843","https://openalex.org/W4224917453","https://openalex.org/W4224919629","https://openalex.org/W4224931713","https://openalex.org/W4224932531","https://openalex.org/W4225302959","https://openalex.org/W4226370149","https://openalex.org/W4232282348","https://openalex.org/W4281712850","https://openalex.org/W4312570973","https://openalex.org/W6631190155","https://openalex.org/W6731370813","https://openalex.org/W6809989929","https://openalex.org/W6810708021","https://openalex.org/W6810856829"],"related_works":["https://openalex.org/W2120771489","https://openalex.org/W2294333436","https://openalex.org/W3110551121","https://openalex.org/W2653598178","https://openalex.org/W2373767407","https://openalex.org/W2131486661","https://openalex.org/W2089240210","https://openalex.org/W642007152","https://openalex.org/W2072884270","https://openalex.org/W4200596008"],"abstract_inverted_index":{"Personalized":[0],"speech":[1,16,20,29,129],"enhancement":[2,30,130],"(PSE)":[3],"utilizes":[4],"additional":[5],"cues":[6],"like":[7],"speaker":[8],"embeddings":[9],"to":[10,50,57,78,90,108],"remove":[11],"background":[12],"noise":[13,40],"and":[14,17,75,113,132],"interfering":[15],"extract":[18],"the":[19,26,36,91,95,110,119,138,160],"from":[21],"target":[22],"speaker.":[23],"Previous":[24],"work,":[25],"Tencent-Ethereal-Audio-Lab":[27],"personalized":[28,150],"(TEA-PSE)":[31],"system,":[32],"ranked":[33],"1st":[34],"in":[35,118,127],"ICASSP":[37],"2022":[38],"deep":[39],"suppression":[41],"(DNS2022)":[42],"challenge.":[43],"In":[44],"this":[45],"paper,":[46],"we":[47,68,103],"expand":[48],"TEA-PSE":[49,55,122,126,145],"its":[51],"sub-band":[52],"version":[53],"-":[54],"2.0,":[56],"reduce":[58,79],"computational":[59,80],"complexity":[60],"as":[61,63],"well":[62],"further":[64],"improve":[65],"performance.":[66],"Specifically,":[67],"adopt":[69],"finite":[70],"impulse":[71],"response":[72],"filter":[73],"banks":[74],"spectrum":[76],"splitting":[77],"complexity.":[81,134],"We":[82],"introduce":[83],"a":[84],"time":[85],"frequency":[86],"convolution":[87,100],"module":[88],"(TFCM)":[89],"system":[92],"for":[93],"increasing":[94],"receptive":[96],"field":[97],"with":[98,153,159],"small":[99],"kernels.":[101],"Besides,":[102],"explore":[104],"several":[105],"training":[106],"strategies":[107],"optimize":[109],"two-stage":[111],"network":[112],"investigate":[114],"various":[115],"loss":[116],"functions":[117],"PSE":[120],"task.":[121],"2.0":[123,146],"significantly":[124],"outperforms":[125],"both":[128],"performance":[131],"computation":[133],"Experimental":[135],"results":[136],"on":[137],"DNS2022":[139],"blind":[140],"test":[141],"set":[142],"show":[143],"that":[144],"brings":[147],"0.102":[148],"OVRL":[149],"DNSMOS":[151],"improvement":[152],"only":[154],"21.9%":[155],"multiply-accumulate":[156],"operations":[157],"compared":[158],"previous":[161],"TEA-PSE.":[162]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
