{"id":"https://openalex.org/W2963301902","doi":"https://doi.org/10.1109/asru.2017.8268969","title":"Cracking the cocktail party problem by multi-beam deep attractor network","display_name":"Cracking the cocktail party problem by multi-beam deep attractor network","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2963301902","doi":"https://doi.org/10.1109/asru.2017.8268969","mag":"2963301902"},"language":"en","primary_location":{"id":"doi:10.1109/asru.2017.8268969","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2017.8268969","pdf_url":null,"source":{"id":"https://openalex.org/S4306498158","display_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106557560","display_name":"Zhuo Chen","orcid":"https://orcid.org/0000-0002-8483-1578"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Zhuo Chen","raw_affiliation_strings":["Microsoft AI and Research"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft AI and Research"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101602536","display_name":"Xiong Xiao","orcid":"https://orcid.org/0009-0001-5128-6518"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Xiong Xiao","raw_affiliation_strings":["Microsoft AI and Research"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101618071","display_name":"Takuya Yoshioka","orcid":"https://orcid.org/0009-0003-7791-3545"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Takuya Yoshioka","raw_affiliation_strings":["Microsoft AI and Research"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101528069","display_name":"Huaming Wang","orcid":"https://orcid.org/0000-0002-4434-7482"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Huaming Wang","raw_affiliation_strings":["Microsoft AI and Research"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101767353","display_name":"Zhenghao Wang","orcid":"https://orcid.org/0000-0002-0920-9041"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhenghao Wang","raw_affiliation_strings":["Microsoft AI and Research"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101928537","display_name":"Yifan Gong","orcid":"https://orcid.org/0000-0002-3912-097X"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yifan Gong","raw_affiliation_strings":["Microsoft AI and Research"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research","institution_ids":["https://openalex.org/I4210164937"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5106557560"],"corresponding_institution_ids":["https://openalex.org/I4210164937"],"apc_list":null,"apc_paid":null,"fwci":4.498,"has_fulltext":false,"cited_by_count":40,"citation_normalized_percentile":{"value":0.95808342,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"437","last_page":"444"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7689254283905029},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6263384819030762},{"id":"https://openalex.org/keywords/beamforming","display_name":"Beamforming","score":0.5813599824905396},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5579753518104553},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.49445652961730957},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.46797874569892883},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.46578162908554077},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.46541523933410645},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.46181073784828186},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.45324790477752686},{"id":"https://openalex.org/keywords/signal-to-noise-ratio","display_name":"Signal-to-noise ratio (imaging)","score":0.44865909218788147},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.4452993869781494},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.44404086470603943},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.4367111921310425},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3768148124217987},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2760786712169647},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.22644802927970886},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11981603503227234},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.08170071244239807}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7689254283905029},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6263384819030762},{"id":"https://openalex.org/C54197355","wikidata":"https://www.wikidata.org/wiki/Q5782992","display_name":"Beamforming","level":2,"score":0.5813599824905396},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5579753518104553},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.49445652961730957},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.46797874569892883},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.46578162908554077},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.46541523933410645},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.46181073784828186},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.45324790477752686},{"id":"https://openalex.org/C13944312","wikidata":"https://www.wikidata.org/wiki/Q7512748","display_name":"Signal-to-noise ratio (imaging)","level":2,"score":0.44865909218788147},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.4452993869781494},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.44404086470603943},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.4367111921310425},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3768148124217987},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2760786712169647},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.22644802927970886},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11981603503227234},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.08170071244239807},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru.2017.8268969","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2017.8268969","pdf_url":null,"source":{"id":"https://openalex.org/S4306498158","display_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.7400000095367432}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W37722415","https://openalex.org/W189850294","https://openalex.org/W210719916","https://openalex.org/W1964538581","https://openalex.org/W1980687383","https://openalex.org/W1984541135","https://openalex.org/W1991139021","https://openalex.org/W2012910847","https://openalex.org/W2013598660","https://openalex.org/W2013608223","https://openalex.org/W2027884847","https://openalex.org/W2031583051","https://openalex.org/W2043216213","https://openalex.org/W2044222806","https://openalex.org/W2060108923","https://openalex.org/W2069681747","https://openalex.org/W2076794394","https://openalex.org/W2079362249","https://openalex.org/W2096855653","https://openalex.org/W2117678320","https://openalex.org/W2121973264","https://openalex.org/W2141411743","https://openalex.org/W2160815625","https://openalex.org/W2221409856","https://openalex.org/W2293634267","https://openalex.org/W2294543795","https://openalex.org/W2296748324","https://openalex.org/W2394932179","https://openalex.org/W2396384435","https://openalex.org/W2398042854","https://openalex.org/W2398972335","https://openalex.org/W2402040300","https://openalex.org/W2460742184","https://openalex.org/W2465697055","https://openalex.org/W2558649592","https://openalex.org/W2561557072","https://openalex.org/W2589857635","https://openalex.org/W2714487941","https://openalex.org/W2735006420","https://openalex.org/W2735663686","https://openalex.org/W2749588430","https://openalex.org/W2749784707","https://openalex.org/W2962715207","https://openalex.org/W2962894366","https://openalex.org/W4256399001","https://openalex.org/W4285719527","https://openalex.org/W6697318756","https://openalex.org/W6719417910"],"related_works":["https://openalex.org/W2963170046","https://openalex.org/W2376244802","https://openalex.org/W1971477554","https://openalex.org/W2158075901","https://openalex.org/W818226659","https://openalex.org/W2923631784","https://openalex.org/W4311414679","https://openalex.org/W2130785056","https://openalex.org/W2962707588","https://openalex.org/W2118992737"],"abstract_inverted_index":{"While":[0],"recent":[1],"progresses":[2],"in":[3,145],"neural":[4],"network":[5,93],"approaches":[6],"to":[7,54,69,94,171],"singlechannel":[8],"speech":[9,79,146,192,219],"separation,":[10,147],"or":[11,128],"more":[12,76],"generally":[13],"the":[14,43,99,107,115,135,140,143,172,201,233],"cocktail":[15],"party":[16],"problem,":[17],"achieved":[18],"significant":[19],"improvement,":[20],"their":[21],"performance":[22,173],"for":[23,39,78,110,160],"complex":[24],"mixtures":[25,124],"is":[26,51,85,102,169,235],"still":[27],"not":[28],"satisfactory.":[29],"In":[30,42],"this":[31,65],"work,":[32],"we":[33,67,118],"propose":[34,68],"a":[35,55,88,120,175,195,228],"novel":[36],"multi-channel":[37,48],"framework":[38],"multi-talker":[40],"separation.":[41,80],"proposed":[44,116,136],"model,":[45,232],"an":[46],"input":[47],"mixture":[49],"signal":[50,84],"firstly":[52],"converted":[53],"set":[56],"of":[57,125,142,174,211,220],"beamformed":[58,83],"signals":[59],"using":[60],"fixed":[61],"beam":[62],"patterns.":[63],"For":[64],"beamforming,":[66],"use":[70],"differential":[71],"beamformers":[72],"as":[73],"they":[74],"are":[75],"suitable":[77],"Then":[81],"each":[82,111],"fed":[86],"into":[87],"single-channel":[89],"anchored":[90],"deep":[91],"attractor":[92],"generate":[95],"separated":[96,202],"signals.":[97],"And":[98],"final":[100],"separation":[101],"acquired":[103],"by":[104],"post":[105],"selecting":[106],"separating":[108],"output":[109],"beams.":[112],"To":[113],"evaluate":[114],"system,":[117],"create":[119],"challenging":[121],"dataset":[122],"comprising":[123],"2,":[126],"3":[127,162,222],"4":[129],"speakers.":[130],"Our":[131],"results":[132],"show":[133],"that":[134,181],"system":[137],"largely":[138],"improves":[139],"state":[141],"art":[144],"achieving":[148,204],"11.5":[149],"dB,":[150],"11.76":[151],"dB":[152,155],"and":[153,163,186,214,223],"11.02":[154],"average":[156],"signal-to-distortion":[157],"ratio":[158],"improvement":[159],"4,":[161,221],"2":[164,224],"overlapped":[165,218],"speaker":[166],"mixtures,":[167],"which":[168],"comparable":[170],"minimum":[176],"variance":[177],"distortionless":[178],"response":[179],"beamformer":[180],"uses":[182],"oracle":[183],"location,":[184],"source,":[185],"noise":[187],"information.":[188],"We":[189],"also":[190],"run":[191],"recognition":[193],"with":[194],"clean":[196],"trained":[197],"acoustic":[198,231],"model":[199],"on":[200,216],"speech,":[203],"relative":[205],"word":[206],"error":[207],"rate":[208],"(WER)":[209],"reduction":[210],"45.76%,":[212],"59.40%":[213],"62.80%":[215],"fully":[217],"speakers,":[225],"respectively.":[226],"With":[227],"far":[229],"talk":[230],"WER":[234],"further":[236],"reduced.":[237]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":7}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
