{"id":"https://openalex.org/W7141707942","doi":"https://doi.org/10.1109/icce67443.2026.11449839","title":"Silence-Aware AV-RVAE with Mala-Based Posterior Sampling for Speech Enhancement","display_name":"Silence-Aware AV-RVAE with Mala-Based Posterior Sampling for Speech Enhancement","publication_year":2026,"publication_date":"2026-02-03","ids":{"openalex":"https://openalex.org/W7141707942","doi":"https://doi.org/10.1109/icce67443.2026.11449839"},"language":null,"primary_location":{"id":"doi:10.1109/icce67443.2026.11449839","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icce67443.2026.11449839","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Conference on Consumer Electronics (ICCE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004014043","display_name":"Zohre Foroushi","orcid":"https://orcid.org/0000-0002-2801-7723"},"institutions":[{"id":"https://openalex.org/I67031392","display_name":"Carleton University","ror":"https://ror.org/02qtvee93","country_code":"CA","type":"education","lineage":["https://openalex.org/I67031392"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Z. Foroushi","raw_affiliation_strings":["Carleton University,Department of Systems &amp; Computer Engineering,Canada"],"affiliations":[{"raw_affiliation_string":"Carleton University,Department of Systems &amp; Computer Engineering,Canada","institution_ids":["https://openalex.org/I67031392"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5130761097","display_name":"R. M. Dansereau","orcid":null},"institutions":[{"id":"https://openalex.org/I67031392","display_name":"Carleton University","ror":"https://ror.org/02qtvee93","country_code":"CA","type":"education","lineage":["https://openalex.org/I67031392"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"R. M. Dansereau","raw_affiliation_strings":["Carleton University,Department of Systems &amp; Computer Engineering,Canada"],"affiliations":[{"raw_affiliation_string":"Carleton University,Department of Systems &amp; Computer Engineering,Canada","institution_ids":["https://openalex.org/I67031392"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5004014043"],"corresponding_institution_ids":["https://openalex.org/I67031392"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.95095882,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9692999720573425,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9692999720573425,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.007600000128149986,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.005200000014156103,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.4000999927520752},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.3734999895095825},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.36910000443458557},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.2904999852180481},{"id":"https://openalex.org/keywords/background-noise","display_name":"Background noise","score":0.27079999446868896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46059998869895935},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4000999927520752},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3772999942302704},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.3734999895095825},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.36910000443458557},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33660000562667847},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2721000015735626},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26930001378059387}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icce67443.2026.11449839","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icce67443.2026.11449839","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Conference on Consumer Electronics (ICCE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W2128653836","https://openalex.org/W2141998673","https://openalex.org/W2747876639","https://openalex.org/W2936446744","https://openalex.org/W2963082324","https://openalex.org/W2964058413","https://openalex.org/W2991015288","https://openalex.org/W3015791137","https://openalex.org/W3016011581","https://openalex.org/W3082851515","https://openalex.org/W3136499730","https://openalex.org/W3174264304","https://openalex.org/W4311841164","https://openalex.org/W4392903707","https://openalex.org/W4403127009","https://openalex.org/W4417072973"],"related_works":[],"abstract_inverted_index":{"We":[0,27],"address":[1],"two":[2],"issues":[3],"in":[4,72,76],"audio\u2013visual":[5],"(AV)":[6],"speech":[7],"enhancement":[8],"with":[9],"recurrent":[10],"VAEs":[11],"(AV-RVAE):":[12],"hallucinated":[13],"speech-like":[14],"energy":[15],"during":[16,63],"silence":[17,32],"periods":[18],"and":[19,42,57,65,96,110,117],"costly":[20],"MAP":[21,75],"inference":[22],"that":[23],"under-represents":[24],"posterior":[25],"uncertainty.":[26],"propose":[28],"(i)":[29],"an":[30],"AV":[31],"detector":[33],"via":[34],"strict-AND":[35],"fusion":[36],"of":[37,74],"voice":[38],"activity":[39],"detection":[40],"(VAD)":[41],"a":[43,48,60],"lip-embedding":[44],"motion":[45],"score,":[46],"(ii)":[47],"silence-aware":[49],"latent":[50],"noise":[51,94],"prototype":[52],"estimated":[53],"from":[54],"detected-silence":[55],"frames":[56],"injected":[58],"as":[59],"light":[61],"shrink":[62],"inference,":[64],"(iii)":[66],"Metropolis-Adjusted":[67],"Langevin":[68],"Algorithm":[69],"(MALA)":[70],"sampling":[71],"place":[73],"the":[77,83,89,99,104],"E-step.":[78],"The":[79],"method":[80],"drops":[81],"into":[82],"standard":[84],"AV-RVAE+NMF":[85],"framework":[86],"without":[87,120],"modifying":[88],"decoder.On":[90],"NTCD\u2013TIMIT":[91],"across":[92],"six":[93],"conditions":[95],"five":[97],"SNRs,":[98],"proposed":[100],"system":[101],"consistently":[102],"outperforms":[103],"AV-RVAE":[105],"baseline":[106],"on":[107],"SI-SDR,":[108],"PESQ,":[109],"STOI.":[111],"Qualitatively,":[112],"it":[113],"produces":[114],"cleaner":[115],"silences":[116],"sharper":[118],"formants":[119],"introducing":[121],"artifacts.":[122]},"counts_by_year":[],"updated_date":"2026-03-29T06:01:01.467347","created_date":"2026-03-28T00:00:00"}
