{"id":"https://openalex.org/W2897977894","doi":"https://doi.org/10.1109/taslp.2018.2876169","title":"Robust Speaker Localization Guided by Deep Learning-Based Time-Frequency Masking","display_name":"Robust Speaker Localization Guided by Deep Learning-Based Time-Frequency Masking","publication_year":2018,"publication_date":"2018-10-15","ids":{"openalex":"https://openalex.org/W2897977894","doi":"https://doi.org/10.1109/taslp.2018.2876169","mag":"2897977894"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2018.2876169","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2018.2876169","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101607498","display_name":"Zhong-Qiu Wang","orcid":"https://orcid.org/0000-0002-4204-9430"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhong-Qiu Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100693230","display_name":"Xueliang Zhang","orcid":"https://orcid.org/0000-0002-0406-1105"},"institutions":[{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueliang Zhang","raw_affiliation_strings":["Department of Computer Science, Inner Mongolia University, Hohhot, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Inner Mongolia University, Hohhot, China","institution_ids":["https://openalex.org/I2722730"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051837453","display_name":"DeLiang Wang","orcid":"https://orcid.org/0000-0001-8195-6319"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"DeLiang Wang","raw_affiliation_strings":["Department of Computer Science and Engineering and the Center for Cognitive and Brain Sciences, The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering and the Center for Cognitive and Brain Sciences, The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101607498"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":8.7526,"has_fulltext":false,"cited_by_count":111,"citation_normalized_percentile":{"value":0.98508254,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"27","issue":"1","first_page":"178","last_page":"188"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11698","display_name":"Underwater Acoustics Research","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1910","display_name":"Oceanography"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monaural","display_name":"Monaural","score":0.8896481990814209},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7502528429031372},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7047914862632751},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6565213203430176},{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.6019390821456909},{"id":"https://openalex.org/keywords/direction-of-arrival","display_name":"Direction of arrival","score":0.509702205657959},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.4324502646923065},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.4255931079387665},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41151174902915955},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.406051903963089},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3993268311023712},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.21188876032829285},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.12854960560798645},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1222984790802002},{"id":"https://openalex.org/keywords/antenna","display_name":"Antenna (radio)","score":0.06812363862991333},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.06463825702667236}],"concepts":[{"id":"https://openalex.org/C102894143","wikidata":"https://www.wikidata.org/wiki/Q1323979","display_name":"Monaural","level":2,"score":0.8896481990814209},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7502528429031372},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7047914862632751},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6565213203430176},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.6019390821456909},{"id":"https://openalex.org/C172051844","wikidata":"https://www.wikidata.org/wiki/Q5280438","display_name":"Direction of arrival","level":3,"score":0.509702205657959},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.4324502646923065},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.4255931079387665},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41151174902915955},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.406051903963089},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3993268311023712},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.21188876032829285},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.12854960560798645},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1222984790802002},{"id":"https://openalex.org/C21822782","wikidata":"https://www.wikidata.org/wiki/Q131214","display_name":"Antenna (radio)","level":2,"score":0.06812363862991333},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.06463825702667236},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2018.2876169","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2018.2876169","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4666743193","display_name":null,"funder_award_id":"FA8750-15-1-0279","funder_id":"https://openalex.org/F4320338294","funder_display_name":"Air Force Research Laboratory"},{"id":"https://openalex.org/G8888347223","display_name":null,"funder_award_id":"IIS-1409431","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338294","display_name":"Air Force Research Laboratory","ror":"https://ror.org/02e2egq70"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W160800111","https://openalex.org/W1482149378","https://openalex.org/W1516630152","https://openalex.org/W1555217905","https://openalex.org/W1562552815","https://openalex.org/W1603075283","https://openalex.org/W1897240248","https://openalex.org/W1964538581","https://openalex.org/W1964998538","https://openalex.org/W1971405469","https://openalex.org/W1993678034","https://openalex.org/W1996304098","https://openalex.org/W2034040413","https://openalex.org/W2046317813","https://openalex.org/W2056522351","https://openalex.org/W2064675550","https://openalex.org/W2069681747","https://openalex.org/W2093010905","https://openalex.org/W2100818340","https://openalex.org/W2104422351","https://openalex.org/W2113638573","https://openalex.org/W2113679114","https://openalex.org/W2129188305","https://openalex.org/W2136484266","https://openalex.org/W2137867936","https://openalex.org/W2148613904","https://openalex.org/W2150384167","https://openalex.org/W2155323221","https://openalex.org/W2168379380","https://openalex.org/W2218753925","https://openalex.org/W2288645994","https://openalex.org/W2289394825","https://openalex.org/W2289480995","https://openalex.org/W2291877678","https://openalex.org/W2364134690","https://openalex.org/W2405404760","https://openalex.org/W2533459608","https://openalex.org/W2559260703","https://openalex.org/W2561557072","https://openalex.org/W2568308529","https://openalex.org/W2611943505","https://openalex.org/W2678916739","https://openalex.org/W2698117193","https://openalex.org/W2701869962","https://openalex.org/W2718052359","https://openalex.org/W2749335474","https://openalex.org/W2765962757","https://openalex.org/W2773475413","https://openalex.org/W2889426390","https://openalex.org/W2962708126","https://openalex.org/W2962866211","https://openalex.org/W3104757150","https://openalex.org/W4231807801","https://openalex.org/W4256399001","https://openalex.org/W6639532686","https://openalex.org/W6679591817","https://openalex.org/W6680448475","https://openalex.org/W6713369370","https://openalex.org/W6743302288","https://openalex.org/W6754870167"],"related_works":["https://openalex.org/W2036157531","https://openalex.org/W2056406069","https://openalex.org/W1518859147","https://openalex.org/W2621851636","https://openalex.org/W1614994442","https://openalex.org/W1976239252","https://openalex.org/W3214716754","https://openalex.org/W2003355886","https://openalex.org/W2538939196","https://openalex.org/W3045520545"],"abstract_inverted_index":{"Deep":[0],"learning-based":[1],"time-frequency":[2],"(T-F)":[3],"masking":[4,36],"has":[5],"dramatically":[6],"advanced":[7],"monaural":[8,91,115],"(single-channel)":[9],"speech":[10,74],"separation":[11],"and":[12,27,37,59,94,136,156],"enhancement.":[13],"This":[14],"study":[15,148],"investigates":[16],"its":[17,157],"potential":[18],"for":[19,82,119,163],"direction":[20],"of":[21,33,107,126],"arrival":[22],"(DOA)":[23],"estimation":[24,142],"in":[25,110,130],"noisy":[26],"reverberant":[28],"environments.":[29],"We":[30],"explore":[31],"ways":[32],"combining":[34],"T-F":[35,76],"conventional":[38],"localization":[39],"algorithms,":[40],"such":[41],"as":[42,49,51],"generalized":[43],"cross":[44],"correlation":[45],"with":[46,104,133],"phase":[47,81],"transform,":[48],"well":[50],"newly":[52],"proposed":[53,128],"algorithms":[54],"based":[55],"on":[56],"steered-response":[57],"SNR":[58],"steering":[60],"vectors.":[61],"The":[62],"key":[63],"idea":[64],"is":[65,87,117],"to":[66,72,102],"utilize":[67],"deep":[68],"neural":[69],"networks":[70],"(DNNs)":[71],"identify":[73],"dominant":[75],"units":[77],"containing":[78],"relatively":[79],"clean":[80],"DOA":[83,141],"estimation.":[84],"Our":[85,147],"DNN":[86],"trained":[88,98],"using":[89],"only":[90,114],"spectral":[92],"information,":[93],"this":[95],"makes":[96],"the":[97,127,152],"model":[99],"directly":[100],"applicable":[101],"arrays":[103],"various":[105],"numbers":[106],"microphones":[108],"arranged":[109],"diverse":[111],"geometries.":[112],"Although":[113],"information":[116],"used":[118],"training,":[120],"experimental":[121],"results":[122],"show":[123],"strong":[124],"robustness":[125],"approach":[129],"new":[131],"environments":[132],"intense":[134],"noise":[135],"room":[137],"reverberation,":[138],"outperforming":[139],"traditional":[140],"methods":[143],"by":[144],"large":[145],"margins.":[146],"also":[149],"suggests":[150],"that":[151],"ideal":[153],"ratio":[154],"mask":[155],"variants":[158],"remain":[159],"effective":[160],"training":[161],"targets":[162],"robust":[164],"speaker":[165],"localization.":[166]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":15},{"year":2022,"cited_by_count":20},{"year":2021,"cited_by_count":25},{"year":2020,"cited_by_count":15},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
