{"id":"https://openalex.org/W2921997986","doi":"https://doi.org/10.1109/icce.2019.8662086","title":"Deep Neural Network-based Speech Separation Combining with MVDR Beamformer for Automatic Speech Recognition System","display_name":"Deep Neural Network-based Speech Separation Combining with MVDR Beamformer for Automatic Speech Recognition System","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2921997986","doi":"https://doi.org/10.1109/icce.2019.8662086","mag":"2921997986"},"language":"en","primary_location":{"id":"doi:10.1109/icce.2019.8662086","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icce.2019.8662086","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Consumer Electronics (ICCE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045678305","display_name":"Bong\u2010Ki Lee","orcid":"https://orcid.org/0000-0002-4289-2284"},"institutions":[{"id":"https://openalex.org/I4210131320","display_name":"LG (South Korea)","ror":"https://ror.org/03ddh2c27","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210131320"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Bong-Ki Lee","raw_affiliation_strings":["Artificial Intelligence Lab., LG Electronics Co., Ltd., Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Lab., LG Electronics Co., Ltd., Seoul, South Korea","institution_ids":["https://openalex.org/I4210131320"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015852282","display_name":"Jae\u2010Woong Jeong","orcid":"https://orcid.org/0000-0001-7607-5453"},"institutions":[{"id":"https://openalex.org/I4210131320","display_name":"LG (South Korea)","ror":"https://ror.org/03ddh2c27","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210131320"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaewoong Jeong","raw_affiliation_strings":["Artificial Intelligence Lab., LG Electronics Co., Ltd., Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Lab., LG Electronics Co., Ltd., Seoul, South Korea","institution_ids":["https://openalex.org/I4210131320"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5045678305"],"corresponding_institution_ids":["https://openalex.org/I4210131320"],"apc_list":null,"apc_paid":null,"fwci":0.8293,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.71578495,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7333246469497681},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7082121968269348},{"id":"https://openalex.org/keywords/beamforming","display_name":"Beamforming","score":0.661503791809082},{"id":"https://openalex.org/keywords/adaptive-beamformer","display_name":"Adaptive beamformer","score":0.5853034853935242},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.5723262429237366},{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.5128762722015381},{"id":"https://openalex.org/keywords/microphone-array","display_name":"Microphone array","score":0.49650460481643677},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.49085408449172974},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.48648616671562195},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.47940871119499207},{"id":"https://openalex.org/keywords/background-noise","display_name":"Background noise","score":0.3909735083580017},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30294743180274963},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.1816515028476715},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08351492881774902}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7333246469497681},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7082121968269348},{"id":"https://openalex.org/C54197355","wikidata":"https://www.wikidata.org/wiki/Q5782992","display_name":"Beamforming","level":2,"score":0.661503791809082},{"id":"https://openalex.org/C33378366","wikidata":"https://www.wikidata.org/wiki/Q4680719","display_name":"Adaptive beamformer","level":3,"score":0.5853034853935242},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5723262429237366},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.5128762722015381},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.49650460481643677},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.49085408449172974},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.48648616671562195},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.47940871119499207},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.3909735083580017},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30294743180274963},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.1816515028476715},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08351492881774902},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icce.2019.8662086","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icce.2019.8662086","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Consumer Electronics (ICCE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1992475611","https://openalex.org/W2044893557","https://openalex.org/W2069681747","https://openalex.org/W2078528584","https://openalex.org/W2160815625","https://openalex.org/W2398042854","https://openalex.org/W2718052359","https://openalex.org/W2784052451"],"related_works":["https://openalex.org/W4389082013","https://openalex.org/W2767070583","https://openalex.org/W2380771148","https://openalex.org/W2784052451","https://openalex.org/W2538903081","https://openalex.org/W2389562147","https://openalex.org/W2109356272","https://openalex.org/W4386746628","https://openalex.org/W2348931051","https://openalex.org/W2132495960"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,15,23,70,124],"speech":[4,26,38,71,77,90,139,176],"enhancement":[5,72],"system":[6,194],"based":[7],"on":[8,101],"deep":[9],"neural":[10],"network":[11],"(DNN)":[12],"combined":[13],"with":[14,45],"minimum":[16],"variance":[17],"distortionless":[18],"response":[19],"(MVDR)":[20],"beamformer":[21,150],"for":[22,119],"noise-robust":[24],"automatic":[25],"recognizer":[27],"(ASR).":[28],"Although":[29],"adaptive":[30],"beamforming":[31,81,180],"algorithms":[32,82],"have":[33,43],"been":[34],"proposed":[35,170],"to":[36,83,151],"improve":[37,84],"recognition":[39],"performance,":[40],"they":[41],"still":[42],"problem":[44],"performance":[46],"degradation":[47],"in":[48,65,189,195],"low":[49],"signal-to-noise":[50],"ratio":[51,96],"(SNR)":[52],"and":[53,61,79,156,178],"speech-like":[54],"noisy":[55],"environments":[56],"such":[57],"as":[58],"babble,":[59],"music,":[60],"TV":[62],"noises.":[63],"Therefore,":[64],"this":[66],"study,":[67],"we":[68],"propose":[69],"algorithm":[73,92],"that":[74,168],"combines":[75,173],"DNN-based":[76,89,138,175],"separation":[78,91,140,177],"MVDR":[80,149,179,187],"the":[85,102,110,114,128,133,137,153,159,164,169,174,190],"ASR":[86,193],"performance.":[87],"First,":[88],"using":[93,123,163],"an":[94,144],"ideal":[95,145],"mask":[97,147],"(IRM)":[98],"is":[99,116,161,182],"performed":[100],"signals":[103,135],"captured":[104],"from":[105],"each":[106,120],"microphone.":[107],"For":[108],"this,":[109],"IRM":[111],"estimated":[112,129,162],"by":[113,143],"DNN":[115,185],"applied":[117],"independently":[118],"microphone":[121],"channel":[122],"mean":[125],"value":[126],"of":[127,136],"masks.":[130],"After":[131],"that,":[132],"output":[134],"are":[141],"processed":[142],"binary":[146],"(IBM)-based":[148],"reduce":[152],"residual":[154],"noise":[155],"reverberation":[157],"where":[158],"IBM":[160],"DNN.":[165],"Experiments":[166],"show":[167],"algorithm,":[171],"which":[172],"algorithms,":[181],"better":[183],"than":[184],"or":[186],"alone":[188],"QVoice,":[191],"LG's":[192],"G7":[196],"smartphone.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-02-25T23:00:34.991745","created_date":"2025-10-10T00:00:00"}
