{"id":"https://openalex.org/W4372263659","doi":"https://doi.org/10.1109/icassp49357.2023.10097252","title":"Personalized Speech Enhancement Combining Band-Split RNN and Speaker Attentive Module","display_name":"Personalized Speech Enhancement Combining Band-Split RNN and Speaker Attentive Module","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372263659","doi":"https://doi.org/10.1109/icassp49357.2023.10097252"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10097252","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10097252","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009836479","display_name":"Xiaohuai Le","orcid":"https://orcid.org/0000-0002-6419-1825"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaohuai Le","raw_affiliation_strings":["Nanjing University,Key Laboratory of Modern Acoustics,Nanjing,China,210093","RTC Lab, ByteDance, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,Key Laboratory of Modern Acoustics,Nanjing,China,210093","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I881766915"]},{"raw_affiliation_string":"RTC Lab, ByteDance, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100379262","display_name":"Li Chen","orcid":"https://orcid.org/0000-0002-5842-838X"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Chen","raw_affiliation_strings":["ByteDance,RTC Lab,China","RTC Lab, ByteDance, China"],"affiliations":[{"raw_affiliation_string":"ByteDance,RTC Lab,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"RTC Lab, ByteDance, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009406286","display_name":"Chao He","orcid":"https://orcid.org/0000-0001-7869-7627"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao He","raw_affiliation_strings":["ByteDance,RTC Lab,China","RTC Lab, ByteDance, China"],"affiliations":[{"raw_affiliation_string":"ByteDance,RTC Lab,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"RTC Lab, ByteDance, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101536720","display_name":"Yiqing Guo","orcid":"https://orcid.org/0000-0002-4184-0802"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiqing Guo","raw_affiliation_strings":["ByteDance,RTC Lab,China","RTC Lab, ByteDance, China"],"affiliations":[{"raw_affiliation_string":"ByteDance,RTC Lab,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"RTC Lab, ByteDance, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100420548","display_name":"Cheng Chen","orcid":"https://orcid.org/0000-0002-4203-2145"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Chen","raw_affiliation_strings":["ByteDance,RTC Lab,China","RTC Lab, ByteDance, China"],"affiliations":[{"raw_affiliation_string":"ByteDance,RTC Lab,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"RTC Lab, ByteDance, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043498697","display_name":"Xianjun Xia","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianjun Xia","raw_affiliation_strings":["ByteDance,RTC Lab,China","RTC Lab, ByteDance, China"],"affiliations":[{"raw_affiliation_string":"ByteDance,RTC Lab,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"RTC Lab, ByteDance, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059632088","display_name":"Jing L\u00fc","orcid":"https://orcid.org/0000-0001-9683-3768"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Lu","raw_affiliation_strings":["Nanjing University,Key Laboratory of Modern Acoustics,Nanjing,China,210093","NJU-Horizon Intelligent Audio Lab, Horizon Robotics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University,Key Laboratory of Modern Acoustics,Nanjing,China,210093","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I881766915"]},{"raw_affiliation_string":"NJU-Horizon Intelligent Audio Lab, Horizon Robotics, Beijing, China","institution_ids":["https://openalex.org/I4401726824"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5009836479"],"corresponding_institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":1.2199,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.78631783,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/concatenation","display_name":"Concatenation (mathematics)","score":0.7805064916610718},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7640594840049744},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7352970838546753},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6520527601242065},{"id":"https://openalex.org/keywords/affine-transformation","display_name":"Affine transformation","score":0.5873124599456787},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5465764999389648},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.5413837432861328},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5310554504394531},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.5227839350700378},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.5024924278259277},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.49718502163887024},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4740797281265259},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.468789666891098},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4647473692893982},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32791438698768616},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09499937295913696}],"concepts":[{"id":"https://openalex.org/C87619178","wikidata":"https://www.wikidata.org/wiki/Q126002","display_name":"Concatenation (mathematics)","level":2,"score":0.7805064916610718},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7640594840049744},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7352970838546753},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6520527601242065},{"id":"https://openalex.org/C92757383","wikidata":"https://www.wikidata.org/wiki/Q382497","display_name":"Affine transformation","level":2,"score":0.5873124599456787},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5465764999389648},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.5413837432861328},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5310554504394531},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.5227839350700378},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5024924278259277},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.49718502163887024},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4740797281265259},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.468789666891098},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4647473692893982},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32791438698768616},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09499937295913696},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10097252","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10097252","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5199999809265137,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2938917877","https://openalex.org/W3160129476","https://openalex.org/W3163132306","https://openalex.org/W4223655162","https://openalex.org/W4224919629","https://openalex.org/W4385822305","https://openalex.org/W6847259909"],"related_works":["https://openalex.org/W2588431733","https://openalex.org/W2146904109","https://openalex.org/W1935076284","https://openalex.org/W2567495185","https://openalex.org/W1890221585","https://openalex.org/W2144122115","https://openalex.org/W2154865245","https://openalex.org/W4312095954","https://openalex.org/W29845478","https://openalex.org/W4372263659"],"abstract_inverted_index":{"Target":[0],"speaker":[1,22,41,51],"information":[2],"can":[3],"be":[4],"utilized":[5],"in":[6,68],"speech":[7,25],"enhancement":[8,26],"(SE)":[9],"models":[10,27],"to":[11,44,60],"more":[12],"effectively":[13],"extract":[14],"the":[15,21,46,50,54,62,69,75,98],"desired":[16],"speech.":[17],"Previous":[18],"works":[19],"introduce":[20],"embedding":[23,52],"into":[24],"by":[28],"means":[29],"of":[30,95,102],"concatenation":[31],"or":[32],"affine":[33],"transformation.":[34],"In":[35],"this":[36,66],"paper,":[37],"we":[38,73],"propose":[39],"a":[40,92],"attentive":[42],"module":[43,67],"calculate":[45],"attention":[47],"scores":[48],"between":[49],"and":[53,104],"intermediate":[55],"features,":[56],"which":[57],"are":[58],"used":[59],"rescale":[61],"features.":[63],"By":[64],"merging":[65],"state-of-the-art":[70],"SE":[71,77],"model,":[72],"construct":[74],"personalized":[76],"model":[78],"for":[79],"ICASSP":[80],"Signal":[81],"Processing":[82],"Grand":[83],"Challenge:":[84],"DNS":[85],"Challenge":[86],"5":[87],"(2023).":[88],"Our":[89],"system":[90],"achieves":[91],"final":[93],"score":[94],"0.529":[96],"on":[97,106],"blind":[99],"test":[100],"set":[101],"track1":[103],"0.549":[105],"track2.":[107]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
