{"id":"https://openalex.org/W2396837968","doi":"https://doi.org/10.1109/icassp.2016.7471719","title":"Exploiting spectro-temporal structures using NMF for DNN-based supervised speech separation","display_name":"Exploiting spectro-temporal structures using NMF for DNN-based supervised speech separation","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2396837968","doi":"https://doi.org/10.1109/icassp.2016.7471719","mag":"2396837968"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2016.7471719","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7471719","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068044387","display_name":"Shuai Nie","orcid":"https://orcid.org/0000-0002-8078-6829"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shuai Nie","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108575841","display_name":"Shan Liang","orcid":"https://orcid.org/0000-0002-9734-9166"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shan Liang","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100348614","display_name":"Hao Li","orcid":"https://orcid.org/0000-0002-5963-4505"},"institutions":[{"id":"https://openalex.org/I64222039","display_name":"National University of Mongolia","ror":"https://ror.org/04855bv47","country_code":"MN","type":"education","lineage":["https://openalex.org/I64222039"]},{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]}],"countries":["CN","MN"],"is_corresponding":false,"raw_author_name":"Hao Li","raw_affiliation_strings":["College of Computer Science, Inner Mongolia University"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Inner Mongolia University","institution_ids":["https://openalex.org/I64222039","https://openalex.org/I2722730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100693230","display_name":"Xueliang Zhang","orcid":"https://orcid.org/0000-0002-0406-1105"},"institutions":[{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]},{"id":"https://openalex.org/I64222039","display_name":"National University of Mongolia","ror":"https://ror.org/04855bv47","country_code":"MN","type":"education","lineage":["https://openalex.org/I64222039"]}],"countries":["CN","MN"],"is_corresponding":false,"raw_author_name":"XueLiang Zhang","raw_affiliation_strings":["College of Computer Science, Inner Mongolia University"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Inner Mongolia University","institution_ids":["https://openalex.org/I64222039","https://openalex.org/I2722730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084649692","display_name":"Zhanlei Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"ZhanLei Yang","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027296897","display_name":"Wen Ju Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Ju Liu","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103677421","display_name":"Li Ke Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li Ke Dong","raw_affiliation_strings":["China State Grid Corp, Electric Power Research Institute of ShanXi Electric Power Company"],"affiliations":[{"raw_affiliation_string":"China State Grid Corp, Electric Power Research Institute of ShanXi Electric Power Company","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5068044387"],"corresponding_institution_ids":["https://openalex.org/I19820366"],"apc_list":null,"apc_paid":null,"fwci":1.2609,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.80100036,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"469","last_page":"473"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.9653372764587402},{"id":"https://openalex.org/keywords/non-negative-matrix-factorization","display_name":"Non-negative matrix factorization","score":0.8935552835464478},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7214412689208984},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6972038149833679},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6170507669448853},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5574048757553101},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5526596903800964},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5378175973892212},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.49064913392066956},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.48426884412765503},{"id":"https://openalex.org/keywords/basis","display_name":"Basis (linear algebra)","score":0.478633314371109},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.47550803422927856},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.47255584597587585},{"id":"https://openalex.org/keywords/background-noise","display_name":"Background noise","score":0.41814762353897095},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20070159435272217},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.1703680455684662},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.12848666310310364}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.9653372764587402},{"id":"https://openalex.org/C152671427","wikidata":"https://www.wikidata.org/wiki/Q10843505","display_name":"Non-negative matrix factorization","level":4,"score":0.8935552835464478},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7214412689208984},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6972038149833679},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6170507669448853},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5574048757553101},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5526596903800964},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5378175973892212},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.49064913392066956},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.48426884412765503},{"id":"https://openalex.org/C12426560","wikidata":"https://www.wikidata.org/wiki/Q189569","display_name":"Basis (linear algebra)","level":2,"score":0.478633314371109},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.47550803422927856},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47255584597587585},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.41814762353897095},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20070159435272217},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.1703680455684662},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.12848666310310364},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2016.7471719","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7471719","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7200000286102295,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1516630152","https://openalex.org/W1552314771","https://openalex.org/W1555814299","https://openalex.org/W1831449718","https://openalex.org/W1902027874","https://openalex.org/W1963970749","https://openalex.org/W1974387177","https://openalex.org/W1999736059","https://openalex.org/W2016891207","https://openalex.org/W2031647436","https://openalex.org/W2033481590","https://openalex.org/W2046869671","https://openalex.org/W2051434435","https://openalex.org/W2062470211","https://openalex.org/W2069681747","https://openalex.org/W2075036909","https://openalex.org/W2077799289","https://openalex.org/W2078528584","https://openalex.org/W2098950531","https://openalex.org/W2127851351","https://openalex.org/W2150866759","https://openalex.org/W2158291955","https://openalex.org/W2160215673","https://openalex.org/W2406185883","https://openalex.org/W2729906263","https://openalex.org/W4213193054","https://openalex.org/W4297801963","https://openalex.org/W4320013820","https://openalex.org/W6638670064","https://openalex.org/W6713859409"],"related_works":["https://openalex.org/W2551137307","https://openalex.org/W2113526703","https://openalex.org/W2123043102","https://openalex.org/W2577807713","https://openalex.org/W3094316140","https://openalex.org/W2547262076","https://openalex.org/W3133205200","https://openalex.org/W4289363934","https://openalex.org/W2563421448","https://openalex.org/W2098101267"],"abstract_inverted_index":{"The":[0],"targets":[1],"of":[2,11,50,63,85],"speech":[3,26,64,86,129],"separation,":[4,27],"whether":[5],"ideal":[6],"masks":[7],"or":[8],"magnitude":[9,51,83],"spectrograms":[10,84],"interest,":[12],"have":[13],"prominent":[14],"spectro-temporal":[15,48],"structures.":[16],"These":[17],"characteristics":[18],"are":[19,30,72],"very":[20],"worthy":[21],"to":[22,45,80],"be":[23],"exploited":[24],"for":[25,110],"however,":[28],"they":[29],"usually":[31],"ignored":[32],"in":[33,127],"previous":[34,125],"works.":[35],"In":[36],"this":[37],"paper,":[38],"we":[39,98],"use":[40],"nonnegative":[41,54,91],"matrix":[42],"factorization":[43],"(NMF)":[44],"exploit":[46],"the":[47,59,68,82,95,111,118,124],"structures":[49],"spectrograms.":[52],"With":[53],"constrains,":[55],"NMF":[56],"can":[57],"capture":[58],"basis":[60,70],"spectra":[61,71],"patterns":[62],"and":[65,87,105],"noise.":[66],"Then":[67],"learned":[69],"integrated":[73],"into":[74],"a":[75,101,106],"deep":[76],"neural":[77],"network":[78],"(DNN)":[79],"reconstruct":[81],"noise":[88],"with":[89,123],"their":[90],"linear":[92],"combination.":[93],"Using":[94],"reconstructed":[96],"spectrograms,":[97],"further":[99],"explore":[100],"discriminative":[102],"training":[103],"objective":[104],"joint":[107],"optimization":[108],"framework":[109],"proposed":[112,119],"model.":[113],"Systematic":[114],"experiments":[115],"show":[116],"that":[117],"model":[120],"is":[121],"competitive":[122],"methods":[126],"monaural":[128],"separation":[130],"tasks.":[131]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
