{"id":"https://openalex.org/W4321770347","doi":"https://doi.org/10.1109/lsp.2023.3248952","title":"So-DAS: A Two-Step Soft-Direction-Aware Speech Separation Framework","display_name":"So-DAS: A Two-Step Soft-Direction-Aware Speech Separation Framework","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4321770347","doi":"https://doi.org/10.1109/lsp.2023.3248952"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2023.3248952","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2023.3248952","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102009335","display_name":"Yi Yang","orcid":"https://orcid.org/0000-0003-4240-863X"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yi Yang","raw_affiliation_strings":["Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Science, Beijing, China","School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Science, Beijing, China","institution_ids":["https://openalex.org/I4210099069"]},{"raw_affiliation_string":"School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101921929","display_name":"Qi Hu","orcid":"https://orcid.org/0000-0001-8832-4728"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Hu","raw_affiliation_strings":["Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Science, Beijing, China","School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Science, Beijing, China","institution_ids":["https://openalex.org/I4210099069"]},{"raw_affiliation_string":"School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101651591","display_name":"Qingwei Zhao","orcid":"https://orcid.org/0000-0001-9272-2614"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingwei Zhao","raw_affiliation_strings":["Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Science, Beijing, China","School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Science, Beijing, China","institution_ids":["https://openalex.org/I4210099069"]},{"raw_affiliation_string":"School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036446253","display_name":"Pengyuan Zhang","orcid":"https://orcid.org/0000-0001-6838-5160"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengyuan Zhang","raw_affiliation_strings":["Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Science, Beijing, China","School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Science, Beijing, China","institution_ids":["https://openalex.org/I4210099069"]},{"raw_affiliation_string":"School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102009335"],"corresponding_institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.7924,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.68747675,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"30","issue":null,"first_page":"344","last_page":"348"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7025832533836365},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.6906658411026001},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6722642183303833},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.6166603565216064},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5736361742019653},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.49235761165618896},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.471076637506485},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.42840877175331116},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.42803704738616943},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35334497690200806},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3276559114456177},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18302181363105774},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.14996838569641113},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07846513390541077}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7025832533836365},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.6906658411026001},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6722642183303833},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.6166603565216064},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5736361742019653},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.49235761165618896},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.471076637506485},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.42840877175331116},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42803704738616943},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35334497690200806},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3276559114456177},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18302181363105774},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.14996838569641113},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07846513390541077},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2023.3248952","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2023.3248952","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.5400000214576721,"id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G6366494449","display_name":null,"funder_award_id":"11774380","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1552314771","https://openalex.org/W2060108923","https://openalex.org/W2067295501","https://openalex.org/W2068144845","https://openalex.org/W2117678320","https://openalex.org/W2127851351","https://openalex.org/W2221409856","https://openalex.org/W2291877678","https://openalex.org/W2734774145","https://openalex.org/W2746904187","https://openalex.org/W2888954542","https://openalex.org/W2909607850","https://openalex.org/W2917254586","https://openalex.org/W2939360348","https://openalex.org/W2940285530","https://openalex.org/W2952218014","https://openalex.org/W2962780374","https://openalex.org/W2962866211","https://openalex.org/W2964058413","https://openalex.org/W2972802573","https://openalex.org/W2973179291","https://openalex.org/W2973231102","https://openalex.org/W3015199127","https://openalex.org/W3015509183","https://openalex.org/W3016232124","https://openalex.org/W3043433769","https://openalex.org/W3097906045","https://openalex.org/W3099330747","https://openalex.org/W3132830522","https://openalex.org/W3140783160","https://openalex.org/W3157899172","https://openalex.org/W3158652463","https://openalex.org/W3158779859","https://openalex.org/W3159058526","https://openalex.org/W3160140981","https://openalex.org/W3163652268","https://openalex.org/W3163842642","https://openalex.org/W3170478154","https://openalex.org/W3171278394","https://openalex.org/W4233392025","https://openalex.org/W4296068764","https://openalex.org/W4297841846","https://openalex.org/W4298211556","https://openalex.org/W4403759581","https://openalex.org/W6702482642","https://openalex.org/W6839903191"],"related_works":["https://openalex.org/W2529301793","https://openalex.org/W2384121599","https://openalex.org/W2562096895","https://openalex.org/W2333799855","https://openalex.org/W3177678247","https://openalex.org/W1999617572","https://openalex.org/W2351687372","https://openalex.org/W2383414243","https://openalex.org/W2464407842","https://openalex.org/W2077498359"],"abstract_inverted_index":{"Most":[0],"existing":[1],"direction-aware":[2],"speech":[3,33,49],"separation":[4,34,50,75,87,119],"systems":[5],"lead":[6],"to":[7,19,72,117,134],"performance":[8],"degradation":[9],"when":[10,147],"the":[11,20,53,74,78,90,95,101,104,118,125,135,148,168],"angle":[12,149],"difference":[13,150],"between":[14,103,151],"speakers":[15,152],"is":[16,107,153],"small":[17],"due":[18],"low":[21],"spatial":[22,70,111],"discrimination.":[23],"To":[24],"address":[25],"this":[26,99],"issue,":[27],"we":[28],"propose":[29],"a":[30,40,48,160],"two-step":[31],"soft-direction-aware":[32],"(So-DAS)":[35],"framework,":[36],"which":[37],"consists":[38],"of":[39,42,165],"direction":[41],"arrival":[43],"(DOA)":[44],"estimation":[45],"module":[46],"and":[47,59,83,89,109],"module.":[51,76],"First,":[52],"two":[54,79,105],"modules":[55,80,106],"are":[56,67,81,92,114],"individually":[57],"optimized,":[58],"directional":[60],"features":[61],"(DFs)":[62],"derived":[63],"from":[64],"ground-truth":[65],"DOAs":[66],"utilized":[68],"as":[69],"information":[71],"facilitate":[73],"Next,":[77],"cascaded":[82],"optimized":[84],"with":[85],"only":[86],"loss,":[88],"DFs":[91,137],"generated":[93],"using":[94],"estimator":[96],"outputs.":[97],"By":[98],"means,":[100],"consistency":[102],"strengthened,":[108],"thus":[110],"cues":[112],"that":[113,132],"more":[115],"beneficial":[116],"task":[120],"can":[121],"be":[122],"exploited":[123],"by":[124,139],"network":[126],"itself.":[127],"The":[128],"experimental":[129],"results":[130],"show":[131],"compared":[133],"baselines,":[136],"extracted":[138],"our":[140,157],"proposed":[141],"method":[142],"provides":[143],"clearer":[144],"superiority,":[145],"especially":[146],"small.":[154],"In":[155],"addition,":[156],"approach":[158],"yields":[159],"state-of-the-art":[161],"word":[162],"error":[163],"rate":[164],"3.4%":[166],"on":[167],"real-recorded":[169],"utterance-wise":[170],"LibriCSS":[171],"dataset.":[172]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
