{"id":"https://openalex.org/W4372348432","doi":"https://doi.org/10.1109/icassp49357.2023.10095818","title":"Learning Audio-Visual Dereverberation","display_name":"Learning Audio-Visual Dereverberation","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372348432","doi":"https://doi.org/10.1109/icassp49357.2023.10095818"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054262111","display_name":"Changan Chen","orcid":"https://orcid.org/0009-0002-2926-5419"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Changan Chen","raw_affiliation_strings":["UT Austin","FAIR, Meta AI"],"affiliations":[{"raw_affiliation_string":"UT Austin","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"FAIR, Meta AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044700284","display_name":"Wei Sun","orcid":"https://orcid.org/0000-0001-8162-1949"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Sun","raw_affiliation_strings":["UT Austin"],"affiliations":[{"raw_affiliation_string":"UT Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004717608","display_name":"David Harwath","orcid":"https://orcid.org/0000-0003-0206-0253"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Harwath","raw_affiliation_strings":["UT Austin"],"affiliations":[{"raw_affiliation_string":"UT Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012765543","display_name":"Kristen Grauman","orcid":"https://orcid.org/0000-0002-9591-5873"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kristen Grauman","raw_affiliation_strings":["UT Austin","FAIR, Meta AI"],"affiliations":[{"raw_affiliation_string":"UT Austin","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"FAIR, Meta AI","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5054262111"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":4.4294,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.95701722,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.8393888473510742},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7801311016082764},{"id":"https://openalex.org/keywords/monaural","display_name":"Monaural","score":0.7172384262084961},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7097043395042419},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4761788249015808},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4657386541366577},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4410708248615265},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.42914384603500366},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33387070894241333},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.25195547938346863},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.08200523257255554},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07327485084533691}],"concepts":[{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.8393888473510742},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7801311016082764},{"id":"https://openalex.org/C102894143","wikidata":"https://www.wikidata.org/wiki/Q1323979","display_name":"Monaural","level":2,"score":0.7172384262084961},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7097043395042419},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4761788249015808},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4657386541366577},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4410708248615265},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.42914384603500366},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33387070894241333},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.25195547938346863},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.08200523257255554},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07327485084533691},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1552314771","https://openalex.org/W1557831657","https://openalex.org/W1901129140","https://openalex.org/W2006129368","https://openalex.org/W2120847449","https://openalex.org/W2136682440","https://openalex.org/W2153418894","https://openalex.org/W2164502538","https://openalex.org/W2194775991","https://openalex.org/W2242685705","https://openalex.org/W2542605056","https://openalex.org/W2890820256","https://openalex.org/W2901243971","https://openalex.org/W2949558265","https://openalex.org/W2949830259","https://openalex.org/W2963040451","https://openalex.org/W2964339842","https://openalex.org/W3096159803","https://openalex.org/W3097777922","https://openalex.org/W3103930150","https://openalex.org/W3108332675","https://openalex.org/W3169721356","https://openalex.org/W3197912330","https://openalex.org/W4225738731","https://openalex.org/W4281739032","https://openalex.org/W4296927107","https://openalex.org/W4312779270","https://openalex.org/W6633301432","https://openalex.org/W6651792446","https://openalex.org/W6729468598","https://openalex.org/W6762114000","https://openalex.org/W6764040762","https://openalex.org/W6782288879","https://openalex.org/W6785656517","https://openalex.org/W6796723430","https://openalex.org/W6810700606","https://openalex.org/W6839307775"],"related_works":["https://openalex.org/W2036157531","https://openalex.org/W2056406069","https://openalex.org/W1518859147","https://openalex.org/W1974981856","https://openalex.org/W1983045063","https://openalex.org/W2045506488","https://openalex.org/W4321794819","https://openalex.org/W2944394647","https://openalex.org/W2621851636","https://openalex.org/W2401567014"],"abstract_inverted_index":{"Reverberation":[0],"not":[1],"only":[2],"degrades":[3],"the":[4,15,29,55,66,88],"quality":[5],"of":[6,17,63,74,97,112,118,123],"speech":[7,19,40,113,136,138],"for":[8,135],"human":[9,49],"perception,":[10],"but":[11],"also":[12],"severely":[13],"impacts":[14],"accuracy":[16],"automatic":[18],"recognition.":[20],"Prior":[21],"work":[22],"attempts":[23],"to":[24,36,38,82],"remove":[25,83],"reverberation":[26,68,84],"based":[27,85],"on":[28,86,129],"audio":[30],"modality":[31],"only.":[32],"Our":[33],"idea":[34],"is":[35],"learn":[37],"dereverberate":[39],"from":[41],"audio-visual":[42],"observations.":[43],"The":[44],"visual":[45,93],"environment":[46],"surrounding":[47],"a":[48,103,121],"speaker":[50,60,141],"reveals":[51],"important":[52],"cues":[53],"about":[54],"room":[56,124],"geometry,":[57],"materials,":[58],"and":[59,92,132,140,149],"location,":[61],"all":[62],"which":[64],"influence":[65],"precise":[67],"effects.":[69],"We":[70],"introduce":[71],"Visually-Informed":[72],"Dereverberation":[73],"Audio":[75],"(VIDA),":[76],"an":[77],"end-to-end":[78],"approach":[79,128],"that":[80,107],"learns":[81],"both":[87,130],"observed":[89],"monaural":[90],"sound":[91],"scene.":[94],"In":[95],"support":[96],"this":[98],"new":[99],"task,":[100],"we":[101,143],"develop":[102],"large-scale":[104],"dataset":[105],"SoundSpaces-Speech":[106],"uses":[108],"realistic":[109],"acoustic":[110],"renderings":[111],"in":[114],"real-world":[115],"3D":[116],"scans":[117],"homes":[119],"offering":[120],"variety":[122],"acoustics.":[125],"Demonstrating":[126],"our":[127],"simulated":[131],"real":[133],"imagery":[134],"enhancement,":[137],"recognition,":[139],"identification,":[142],"show":[144],"it":[145],"achieves":[146],"state-of-the-art":[147],"performance":[148],"substantially":[150],"improves":[151],"over":[152],"audio-only":[153],"methods.":[154]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":5}],"updated_date":"2026-03-12T06:13:28.667946","created_date":"2025-10-10T00:00:00"}
