{"id":"https://openalex.org/W3081729342","doi":"https://doi.org/10.1109/lsp.2020.3043977","title":"SAGRNN: Self-Attentive Gated RNN For Binaural Speaker Separation With Interaural Cue Preservation","display_name":"SAGRNN: Self-Attentive Gated RNN For Binaural Speaker Separation With Interaural Cue Preservation","publication_year":2020,"publication_date":"2020-12-11","ids":{"openalex":"https://openalex.org/W3081729342","doi":"https://doi.org/10.1109/lsp.2020.3043977","mag":"3081729342"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2020.3043977","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2020.3043977","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2009.01381","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ke Tan","orcid":"https://orcid.org/0000-0001-5073-8060"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ke Tan","raw_affiliation_strings":["Department of Computer Science and Engineering, The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Buye Xu","orcid":"https://orcid.org/0000-0002-3027-7567"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Buye Xu","raw_affiliation_strings":["Facebook Reality Labs, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Facebook Reality Labs, Redmond, WA, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Anurag Kumar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anurag Kumar","raw_affiliation_strings":["Facebook Reality Labs, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Facebook Reality Labs, Redmond, WA, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Eliya Nachmani","orcid":null},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]},{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Eliya Nachmani","raw_affiliation_strings":["Facebook AI Research, Tel Aviv, Israel","Tel-Aviv University, Tel Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research, Tel Aviv, Israel","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Tel-Aviv University, Tel Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"last","author":{"id":null,"display_name":"Yossi Adi","orcid":null},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]},{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Yossi Adi","raw_affiliation_strings":["Facebook AI Research, Tel Aviv, Israel","Tel-Aviv University, Tel Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research, Tel Aviv, Israel","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Tel-Aviv University, Tel Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":2.2857,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.89000851,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"28","issue":null,"first_page":"26","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9133999943733215,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9133999943733215,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.07000000029802322,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.0017000000225380063,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/binaural-recording","display_name":"Binaural recording","score":0.9506000280380249},{"id":"https://openalex.org/keywords/monaural","display_name":"Monaural","score":0.9381999969482422},{"id":"https://openalex.org/keywords/sound-localization","display_name":"Sound localization","score":0.5328999757766724},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.5029000043869019},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.48809999227523804},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.41830000281333923},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.40849998593330383},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.36469998955726624}],"concepts":[{"id":"https://openalex.org/C201247586","wikidata":"https://www.wikidata.org/wiki/Q5612967","display_name":"Binaural recording","level":2,"score":0.9506000280380249},{"id":"https://openalex.org/C102894143","wikidata":"https://www.wikidata.org/wiki/Q1323979","display_name":"Monaural","level":2,"score":0.9381999969482422},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.707099974155426},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6365000009536743},{"id":"https://openalex.org/C68236139","wikidata":"https://www.wikidata.org/wiki/Q765652","display_name":"Sound localization","level":2,"score":0.5328999757766724},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.5029000043869019},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.48809999227523804},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.41830000281333923},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.40849998593330383},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.36469998955726624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35580000281333923},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.33230000734329224},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C103364767","wikidata":"https://www.wikidata.org/wiki/Q1807562","display_name":"Interaural time difference","level":3,"score":0.31520000100135803},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.3091999888420105},{"id":"https://openalex.org/C73208851","wikidata":"https://www.wikidata.org/wiki/Q5157303","display_name":"Computational auditory scene analysis","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C51432778","wikidata":"https://www.wikidata.org/wiki/Q1259145","display_name":"Independent component analysis","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.274399995803833},{"id":"https://openalex.org/C2777443451","wikidata":"https://www.wikidata.org/wiki/Q821413","display_name":"Auditory system","level":2,"score":0.26669999957084656}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lsp.2020.3043977","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2020.3043977","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2009.01381","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2009.01381","pdf_url":"https://arxiv.org/pdf/2009.01381","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2009.01381","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2009.01381","pdf_url":"https://arxiv.org/pdf/2009.01381","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1482149378","https://openalex.org/W1552314771","https://openalex.org/W1614886494","https://openalex.org/W1677182931","https://openalex.org/W1910234244","https://openalex.org/W1919482273","https://openalex.org/W1991139021","https://openalex.org/W2018541726","https://openalex.org/W2031908730","https://openalex.org/W2039983426","https://openalex.org/W2108668360","https://openalex.org/W2129171989","https://openalex.org/W2166682639","https://openalex.org/W2221409856","https://openalex.org/W2402058645","https://openalex.org/W2460742184","https://openalex.org/W2516001803","https://openalex.org/W2558649592","https://openalex.org/W2734774145","https://openalex.org/W2797579321","https://openalex.org/W2891133971","https://openalex.org/W2891405874","https://openalex.org/W2915654125","https://openalex.org/W2952218014","https://openalex.org/W2962715207","https://openalex.org/W2962905190","https://openalex.org/W2963317762","https://openalex.org/W2963443859","https://openalex.org/W2972460025","https://openalex.org/W3015199127","https://openalex.org/W3015279216","https://openalex.org/W3026111682","https://openalex.org/W3097906045","https://openalex.org/W6739901393","https://openalex.org/W6747620207","https://openalex.org/W6751512325","https://openalex.org/W6771792932","https://openalex.org/W6773419339","https://openalex.org/W6774687970","https://openalex.org/W6774995033"],"related_works":[],"abstract_inverted_index":{"Most":[0],"existing":[1],"deep":[2],"learning":[3],"based":[4],"binaural":[5,47,57,91,118],"speaker":[6,48],"separation":[7,49,70,113,119],"systems":[8],"focus":[9],"on":[10],"producing":[11],"a":[12,62,116],"monaural":[13,69],"estimate":[14],"for":[15,32,68],"each":[16],"of":[17,93,98,134],"the":[18,26,55,90,94,99,127,132],"target":[19],"speakers,":[20],"and":[21,39,76],"thus":[22],"do":[23],"not":[24],"preserve":[25],"interaural":[27,51,128],"cues,":[28,129],"which":[29,86,130],"are":[30],"crucial":[31],"human":[33],"listeners":[34],"to":[35,96],"perform":[36],"sound":[37,135],"localization":[38],"lateralization.":[40],"In":[41,121],"this":[42],"study,":[43],"we":[44,60],"address":[45],"talker-independent":[46],"with":[50],"cues":[52],"preserved":[53],"in":[54],"estimated":[56],"signals.":[58,101],"Specifically,":[59],"extend":[61],"newly-developed":[63],"gated":[64],"recurrent":[65],"neural":[66],"network":[67],"by":[71],"additionally":[72],"incorporating":[73],"self-attention":[74],"mechanisms":[75],"dense":[77],"connectivity.":[78],"We":[79],"develop":[80],"an":[81],"end-to-end":[82],"multiple-input":[83],"multiple-output":[84],"system,":[85],"directly":[87],"maps":[88],"from":[89],"waveform":[92],"mixture":[95],"those":[97],"speech":[100],"The":[102],"experimental":[103],"results":[104],"show":[105],"that":[106],"our":[107,123],"proposed":[108],"approach":[109,124],"achieves":[110],"significantly":[111],"better":[112],"performance":[114],"than":[115],"recent":[117],"approach.":[120],"addition,":[122],"effectively":[125],"preserves":[126],"improves":[131],"accuracy":[133],"localization.":[136]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":4}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-09-08T00:00:00"}
