{"id":"https://openalex.org/W3161305977","doi":"https://doi.org/10.1109/icassp39728.2021.9413818","title":"Speaker and Direction Inferred Dual-Channel Speech Separation","display_name":"Speaker and Direction Inferred Dual-Channel Speech Separation","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3161305977","doi":"https://doi.org/10.1109/icassp39728.2021.9413818","mag":"3161305977"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050682332","display_name":"Chenxing Li","orcid":"https://orcid.org/0000-0002-3997-8212"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chenxing Li","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101746187","display_name":"Jiaming Xu","orcid":"https://orcid.org/0000-0001-7635-1059"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaming Xu","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033351155","display_name":"Nima Mesgarani","orcid":"https://orcid.org/0000-0002-2987-759X"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nima Mesgarani","raw_affiliation_strings":["Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108642431","display_name":"Bo Xu","orcid":"https://orcid.org/0000-0002-1111-1529"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Xu","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5050682332"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.7618,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.69959432,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"5779","last_page":"5783"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.793460488319397},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7235949039459229},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6316222548484802},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5974266529083252},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.5710811018943787},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.5566624999046326},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.5401998162269592},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5363389849662781},{"id":"https://openalex.org/keywords/anechoic-chamber","display_name":"Anechoic chamber","score":0.5266815423965454},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4413371980190277},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.352292537689209},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.15250951051712036},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10504955053329468},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.098274827003479}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.793460488319397},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7235949039459229},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6316222548484802},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5974266529083252},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.5710811018943787},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.5566624999046326},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.5401998162269592},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5363389849662781},{"id":"https://openalex.org/C149712133","wikidata":"https://www.wikidata.org/wiki/Q332774","display_name":"Anechoic chamber","level":2,"score":0.5266815423965454},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4413371980190277},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.352292537689209},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.15250951051712036},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10504955053329468},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.098274827003479},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.6299999952316284}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W2067584370","https://openalex.org/W2117678320","https://openalex.org/W2127851351","https://openalex.org/W2133564696","https://openalex.org/W2221409856","https://openalex.org/W2531409750","https://openalex.org/W2558649592","https://openalex.org/W2734774145","https://openalex.org/W2750446090","https://openalex.org/W2792764867","https://openalex.org/W2800664709","https://openalex.org/W2888954542","https://openalex.org/W2891405874","https://openalex.org/W2891833136","https://openalex.org/W2944972166","https://openalex.org/W2952218014","https://openalex.org/W2962715207","https://openalex.org/W2963248507","https://openalex.org/W2963317762","https://openalex.org/W2963403868","https://openalex.org/W2963620441","https://openalex.org/W2964238697","https://openalex.org/W2964308564","https://openalex.org/W2972460025","https://openalex.org/W2972583914","https://openalex.org/W2972767900","https://openalex.org/W2973054567","https://openalex.org/W2973062255","https://openalex.org/W2973179291","https://openalex.org/W2996820600","https://openalex.org/W3015199127","https://openalex.org/W3016361963","https://openalex.org/W3096730413","https://openalex.org/W3097653961","https://openalex.org/W3099330747","https://openalex.org/W3103434036","https://openalex.org/W4385245566","https://openalex.org/W6679434410","https://openalex.org/W6739901393","https://openalex.org/W6749825310","https://openalex.org/W6752554729","https://openalex.org/W6762182681","https://openalex.org/W6772027853"],"related_works":["https://openalex.org/W2289848901","https://openalex.org/W2001172292","https://openalex.org/W2169567031","https://openalex.org/W1849802015","https://openalex.org/W2354873283","https://openalex.org/W2023041720","https://openalex.org/W3097766872","https://openalex.org/W2353977479","https://openalex.org/W1539630658","https://openalex.org/W2077498359"],"abstract_inverted_index":{"Most":[0],"speech":[1,53,137],"separation":[2,54,138],"methods,":[3],"trying":[4],"to":[5,58,96,98],"separate":[6],"all":[7],"channel":[8],"sources":[9,126],"simultaneously,":[10],"are":[11,94],"still":[12],"far":[13],"from":[14,40,80],"having":[15],"enough":[16],"generalization":[17],"capabilities":[18],"for":[19],"real":[20],"scenarios":[21],"where":[22],"the":[23,60,70,81,84,91,110,119,122,128,145],"number":[24,124],"of":[25,83,112,121,125,130,153],"input":[26],"sounds":[27],"is":[28],"usually":[29],"uncertain":[30],"and":[31,46,50,77,115,127,142,147,158],"even":[32],"dynamic.":[33],"In":[34],"this":[35],"work,":[36],"we":[37],"employ":[38],"ideas":[39],"auditory":[41],"attention":[42],"with":[43,74,109,118],"two":[44],"ears":[45],"propose":[47],"a":[48,87],"speaker":[49,76],"direction":[51,78],"inferred":[52],"network":[55],"(dubbed":[56],"SDNet)":[57],"solve":[59],"cocktail":[61],"party":[62],"problem.":[63],"Specifically,":[64],"our":[65,148],"SDNet":[66],"first":[67],"parses":[68],"out":[69],"respective":[71],"perceptual":[72,92,107],"representations":[73,93,108],"their":[75],"characteristics":[79],"mixture":[82],"scene":[85],"in":[86],"sequential":[88],"manner.":[89],"Then,":[90],"utilized":[95],"attend":[97],"each":[99],"corresponding":[100],"speech.":[101],"Our":[102,164],"model":[103],"generates":[104],"more":[105],"precise":[106],"help":[111],"spatial":[113],"features":[114],"successfully":[116],"deals":[117],"problem":[120],"unknown":[123],"selection":[129],"outputs.":[131],"The":[132],"experiments":[133],"on":[134],"standard":[135],"fully-overlapped":[136],"benchmarks,":[139],"WSJ0-2mix,":[140],"WSJ0-3mix,":[141],"WSJ0-2&3mix,":[143],"show":[144],"effectiveness,":[146],"method":[149],"achieves":[150],"SDR":[151],"improvements":[152],"25.31":[154],"dB,":[155,157],"17.26":[156],"21.56":[159],"dB":[160],"under":[161],"anechoic":[162],"settings.":[163],"codes":[165],"will":[166],"be":[167],"released":[168],"at":[169],"https://github.com/aispeech-lab/SDNet.":[170]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
