{"id":"https://openalex.org/W2401089611","doi":"https://doi.org/10.1109/icassp.2016.7471725","title":"Combining non-negative matrix factorization and deep neural networks for speech enhancement and automatic speech recognition","display_name":"Combining non-negative matrix factorization and deep neural networks for speech enhancement and automatic speech recognition","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2401089611","doi":"https://doi.org/10.1109/icassp.2016.7471725","mag":"2401089611"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2016.7471725","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7471725","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025765444","display_name":"Thanh Tung Vu","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Thanh T. Vu","raw_affiliation_strings":["School of Computer Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computer Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074863085","display_name":"Benjamin Bigot","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Benjamin Bigot","raw_affiliation_strings":["Rolls-Royce@NTU Corporate Lab, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"Rolls-Royce@NTU Corporate Lab, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070872826","display_name":"Eng Siong Chng","orcid":"https://orcid.org/0000-0001-6257-7399"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Eng Siong Chng","raw_affiliation_strings":["School of Computer Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computer Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5025765444"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":3.8314,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.94154892,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"499","last_page":"503"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.8011561036109924},{"id":"https://openalex.org/keywords/non-negative-matrix-factorization","display_name":"Non-negative matrix factorization","score":0.7584853172302246},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7407553195953369},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7368406057357788},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5386841297149658},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.4718572795391083},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.468650758266449},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4352370500564575},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3682962954044342},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.0863199532032013},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.052869588136672974}],"concepts":[{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.8011561036109924},{"id":"https://openalex.org/C152671427","wikidata":"https://www.wikidata.org/wiki/Q10843505","display_name":"Non-negative matrix factorization","level":4,"score":0.7584853172302246},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7407553195953369},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7368406057357788},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5386841297149658},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.4718572795391083},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.468650758266449},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4352370500564575},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3682962954044342},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0863199532032013},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.052869588136672974},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2016.7471725","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7471725","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320709","display_name":"National Research Foundation Singapore","ror":"https://ror.org/03cpyc314"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1495679096","https://openalex.org/W1504438288","https://openalex.org/W1524333225","https://openalex.org/W1555814299","https://openalex.org/W1902027874","https://openalex.org/W2013139519","https://openalex.org/W2032252376","https://openalex.org/W2036245667","https://openalex.org/W2072128103","https://openalex.org/W2077799289","https://openalex.org/W2078528584","https://openalex.org/W2104104263","https://openalex.org/W2144404214","https://openalex.org/W2289394825","https://openalex.org/W2405589348","https://openalex.org/W4231109964","https://openalex.org/W4253928870","https://openalex.org/W6631362777"],"related_works":["https://openalex.org/W2127243424","https://openalex.org/W4390394189","https://openalex.org/W2037504162","https://openalex.org/W2539013788","https://openalex.org/W2792706544","https://openalex.org/W1568451138","https://openalex.org/W2156699640","https://openalex.org/W2045265907","https://openalex.org/W2972997031","https://openalex.org/W34555840"],"abstract_inverted_index":{"Sparse":[0],"Non-negative":[1],"Matrix":[2],"Factorization":[3],"(SNMF)":[4],"and":[5,34,39,59,130],"Deep":[6],"Neural":[7],"Networks":[8],"(DNN)":[9],"have":[10],"emerged":[11],"individually":[12],"as":[13],"two":[14],"efficient":[15],"machine":[16],"learning":[17],"techniques":[18],"for":[19,36,118,138],"single-channel":[20],"speech":[21,37,54,89,106,122],"enhancement.":[22],"Nevertheless,":[23],"there":[24],"are":[25,93],"only":[26],"few":[27],"works":[28],"investigating":[29],"the":[30,67,71,75,79,96,142],"combination":[31,52],"of":[32,53,70,78,98,103,128],"SNMF":[33,58],"DNN":[35,60,72],"enhancement":[38,55,90,123],"robust":[40],"Automatic":[41],"Speech":[42],"Recognition":[43],"(ASR).":[44],"In":[45],"this":[46],"paper,":[47],"we":[48],"present":[49],"a":[50,62,131],"novel":[51],"components":[56],"based-on":[57],"into":[61],"full-stack":[63],"system.":[64],"We":[65],"refine":[66],"cost":[68],"function":[69],"to":[73,141],"back-propagate":[74],"reconstruction":[76],"error":[77],"enhanced":[80],"speech.":[81],"Our":[82,113],"proposal":[83],"is":[84],"compared":[85,140],"with":[86,125],"several":[87],"state-of-the-art":[88],"systems.":[91],"Evaluations":[92],"conducted":[94],"on":[95],"data":[97],"CHiME-3":[99],"challenge":[100],"which":[101],"consists":[102],"real":[104],"noisy":[105,111],"recordings":[107],"captured":[108],"under":[109],"challenging":[110],"conditions.":[112],"system":[114],"yields":[115],"significant":[116],"improvements":[117],"both":[119],"objective":[120],"quality":[121],"measurements":[124],"relative":[126,133],"gain":[127],"30%,":[129],"10%":[132],"Word":[134],"Error":[135],"Rate":[136],"reduction":[137],"ASR":[139],"best":[143],"baselines.":[144]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-01T17:29:45.350535","created_date":"2025-10-10T00:00:00"}
