{"id":"https://openalex.org/W4380715308","doi":"https://doi.org/10.21437/interspeech.2023-232","title":"Unsupervised speech enhancement with deep dynamical generative speech and noise models","display_name":"Unsupervised speech enhancement with deep dynamical generative speech and noise models","publication_year":2023,"publication_date":"2023-08-14","ids":{"openalex":"https://openalex.org/W4380715308","doi":"https://doi.org/10.21437/interspeech.2023-232"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2023-232","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-232","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2306.07820","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087031558","display_name":"Xiaoyu Lin","orcid":"https://orcid.org/0000-0001-7486-9452"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Xiaoyu Lin","raw_affiliation_strings":["ROBOTLEARN - Vers des robots \u00e0 l\u2019intelligence sociale au travers de l\u2019apprentissage, de la perception et de la commande (INRIA Rh\u00f4ne-Alpes 655 avenue de l'Europe 38330 Montbonnot - France)"],"affiliations":[{"raw_affiliation_string":"ROBOTLEARN - Vers des robots \u00e0 l\u2019intelligence sociale au travers de l\u2019apprentissage, de la perception et de la commande (INRIA Rh\u00f4ne-Alpes 655 avenue de l'Europe 38330 Montbonnot - France)","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045593473","display_name":"Simon Leglaive","orcid":"https://orcid.org/0000-0002-8219-1298"},"institutions":[{"id":"https://openalex.org/I4210100151","display_name":"Institut d'\u00c9lectronique et des Technologies du num\u00e9Rique","ror":"https://ror.org/013q33h79","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I28221208","https://openalex.org/I4210095849","https://openalex.org/I4210100151","https://openalex.org/I56067802","https://openalex.org/I97188460"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Simon Leglaive","raw_affiliation_strings":["IETR - Institut d'\u00c9lectronique et des Technologies du num\u00e9Rique (Campus de Beaulieu B\u00e2timent 11D 263 Av.G\u00e9n\u00e9ral Leclerc-CS 74205 35042 Rennes Cedex - France)"],"affiliations":[{"raw_affiliation_string":"IETR - Institut d'\u00c9lectronique et des Technologies du num\u00e9Rique (Campus de Beaulieu B\u00e2timent 11D 263 Av.G\u00e9n\u00e9ral Leclerc-CS 74205 35042 Rennes Cedex - France)","institution_ids":["https://openalex.org/I4210100151"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020392160","display_name":"Laurent Girin","orcid":"https://orcid.org/0000-0002-9214-8760"},"institutions":[{"id":"https://openalex.org/I4210124956","display_name":"Grenoble Images Parole Signal Automatique","ror":"https://ror.org/02wrme198","country_code":"FR","type":"facility","lineage":["https://openalex.org/I106785703","https://openalex.org/I1294671590","https://openalex.org/I4210124956","https://openalex.org/I899635006","https://openalex.org/I899635006"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Laurent Girin","raw_affiliation_strings":["GIPSA-CRISSP - GIPSA - Cognitive Robotics, Interactive Systems, & Speech Processing (GIPSA-lab, 11 rue des Math\u00e9matiques, Grenoble Campus BP46, F-38402 SAINT MARTIN D'HERES CEDEX - France)"],"affiliations":[{"raw_affiliation_string":"GIPSA-CRISSP - GIPSA - Cognitive Robotics, Interactive Systems, & Speech Processing (GIPSA-lab, 11 rue des Math\u00e9matiques, Grenoble Campus BP46, F-38402 SAINT MARTIN D'HERES CEDEX - France)","institution_ids":["https://openalex.org/I4210124956"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066621495","display_name":"Xavier Alameda-Pineda","orcid":"https://orcid.org/0000-0002-5354-1084"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Xavier Alameda-Pineda","raw_affiliation_strings":["ROBOTLEARN - Vers des robots \u00e0 l\u2019intelligence sociale au travers de l\u2019apprentissage, de la perception et de la commande (INRIA Rh\u00f4ne-Alpes 655 avenue de l'Europe 38330 Montbonnot - France)"],"affiliations":[{"raw_affiliation_string":"ROBOTLEARN - Vers des robots \u00e0 l\u2019intelligence sociale au travers de l\u2019apprentissage, de la perception et de la commande (INRIA Rh\u00f4ne-Alpes 655 avenue de l'Europe 38330 Montbonnot - France)","institution_ids":["https://openalex.org/I1326498283"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5087031558"],"corresponding_institution_ids":["https://openalex.org/I1326498283"],"apc_list":null,"apc_paid":null,"fwci":0.9854,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.74891748,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"5102","last_page":"5106"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13289","display_name":"Infant Health and Development","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/3611","display_name":"Pharmacy"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/non-negative-matrix-factorization","display_name":"Non-negative matrix factorization","score":0.7578349709510803},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7204792499542236},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.7007488012313843},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6791406273841858},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.6765021681785583},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.61305171251297},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5344094038009644},{"id":"https://openalex.org/keywords/noise-measurement","display_name":"Noise measurement","score":0.5183702111244202},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5155153274536133},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4810902774333954},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.4324612617492676},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.408141553401947},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.34163978695869446},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.3003939390182495},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.2262265682220459},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.18648293614387512},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.0637119710445404}],"concepts":[{"id":"https://openalex.org/C152671427","wikidata":"https://www.wikidata.org/wiki/Q10843505","display_name":"Non-negative matrix factorization","level":4,"score":0.7578349709510803},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7204792499542236},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.7007488012313843},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6791406273841858},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.6765021681785583},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.61305171251297},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5344094038009644},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.5183702111244202},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5155153274536133},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4810902774333954},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.4324612617492676},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.408141553401947},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.34163978695869446},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3003939390182495},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.2262265682220459},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.18648293614387512},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0637119710445404},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2023-232","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-232","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2306.07820","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.07820","pdf_url":"https://arxiv.org/pdf/2306.07820","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:HAL:hal-04132312v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-04132312","pdf_url":null,"source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://interspeech2023.org/","raw_type":"Conference papers"},{"id":"doi:10.48550/arxiv.2306.07820","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2306.07820","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2306.07820","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.07820","pdf_url":"https://arxiv.org/pdf/2306.07820","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7599999904632568,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4380715308.pdf"},"referenced_works_count":38,"referenced_works":["https://openalex.org/W891534129","https://openalex.org/W1495679096","https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W2039844283","https://openalex.org/W2094721231","https://openalex.org/W2141998673","https://openalex.org/W2149273154","https://openalex.org/W2603567530","https://openalex.org/W2766672686","https://openalex.org/W2901552243","https://openalex.org/W2911579794","https://openalex.org/W2943237054","https://openalex.org/W2949558265","https://openalex.org/W2962866211","https://openalex.org/W2963263347","https://openalex.org/W2963341071","https://openalex.org/W2964058413","https://openalex.org/W2972411915","https://openalex.org/W3031135612","https://openalex.org/W3087443678","https://openalex.org/W3100968126","https://openalex.org/W3130335839","https://openalex.org/W3147966746","https://openalex.org/W3174264304","https://openalex.org/W3186301694","https://openalex.org/W3217536461","https://openalex.org/W4221144097","https://openalex.org/W4225263883","https://openalex.org/W4232282348","https://openalex.org/W4287120585","https://openalex.org/W4287236406","https://openalex.org/W4288089383","https://openalex.org/W4295122182","https://openalex.org/W4295177495","https://openalex.org/W4303493829","https://openalex.org/W4311841227","https://openalex.org/W4380434618"],"related_works":["https://openalex.org/W2919389044","https://openalex.org/W1997528538","https://openalex.org/W2777466939","https://openalex.org/W2889693761","https://openalex.org/W2890579888","https://openalex.org/W1966856063","https://openalex.org/W2900450731","https://openalex.org/W2593427229","https://openalex.org/W2002243964","https://openalex.org/W2022538999"],"abstract_inverted_index":{"This":[0,61],"work":[1,6],"builds":[2],"on":[3,7,48,54,59],"a":[4,12,40,100],"previous":[5],"unsupervised":[8,90],"speech":[9,20,91],"enhancement":[10,92],"using":[11],"dynamical":[13,42],"variational":[14],"autoencoder":[15],"(DVAE)":[16],"as":[17,27],"the":[18,28,35,49,55,81,95],"clean":[19],"model":[21,38,44],"and":[22,71],"non-negative":[23],"matrix":[24],"factorization":[25],"(NMF)":[26],"noise":[29,37,72],"model.":[30],"We":[31],"propose":[32],"to":[33,88],"replace":[34],"NMF":[36],"with":[39],"deep":[41],"generative":[43],"(DDGM)":[45],"depending":[46],"either":[47],"DVAE":[50],"latent":[51],"variables,":[52],"or":[53,58],"noisy":[56],"observations,":[57],"both.":[60],"DDGM":[62],"can":[63],"be":[64],"trained":[65],"in":[66],"three":[67],"configurations:":[68],"noise-agnostic,":[69],"noise-dependent":[70,75,96],"adaptation":[73],"after":[74],"training.":[76],"Experimental":[77],"results":[78],"show":[79],"that":[80],"proposed":[82],"method":[83],"achieves":[84],"competitive":[85],"performance":[86],"compared":[87],"state-of-the-art":[89],"methods,":[93],"while":[94],"training":[97],"configuration":[98],"yields":[99],"much":[101],"more":[102],"time-efficient":[103],"inference":[104],"process.":[105]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2023-06-15T00:00:00"}
