{"id":"https://openalex.org/W4409225738","doi":"https://doi.org/10.1109/jstsp.2025.3558654","title":"SAV-SE: Scene-Aware Audio-Visual Speech Enhancement With Selective State Space Model","display_name":"SAV-SE: Scene-Aware Audio-Visual Speech Enhancement With Selective State Space Model","publication_year":2025,"publication_date":"2025-04-07","ids":{"openalex":"https://openalex.org/W4409225738","doi":"https://doi.org/10.1109/jstsp.2025.3558654"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2025.3558654","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2025.3558654","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056495776","display_name":"Xinyuan Qian","orcid":"https://orcid.org/0000-0002-9511-6713"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinyuan Qian","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011442848","display_name":"Jie Gao","orcid":"https://orcid.org/0000-0003-0400-9961"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaran Gao","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044256713","display_name":"Yaodan Zhang","orcid":"https://orcid.org/0009-0007-8811-3091"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaodan Zhang","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042675934","display_name":"Qiquan Zhang","orcid":"https://orcid.org/0000-0001-5089-6317"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Qiquan Zhang","raw_affiliation_strings":["School of Electrical Engineering and Telecommunications, The University of New South Wales, Sydney, NSW, Australia","School of Electrical Engineering and Telecommunications, The University of New South Wales, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Telecommunications, The University of New South Wales, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I31746571"]},{"raw_affiliation_string":"School of Electrical Engineering and Telecommunications, The University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091096053","display_name":"Hexin Liu","orcid":"https://orcid.org/0000-0002-3998-9229"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Hexin Liu","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059858850","display_name":"Leibny Paola Garcia","orcid":"https://orcid.org/0000-0002-7449-5726"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Leibny Paola Garcia Perera","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD, USA","CLSP and HLT-COE, Johns Hopkins University, USA"],"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD, USA","institution_ids":["https://openalex.org/I145311948"]},{"raw_affiliation_string":"CLSP and HLT-COE, Johns Hopkins University, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["Guangdong Provincial Key Laboratory of Big Data Computing, The Chinese University of Hong Kong (Shenzhen), Shenzhen, China","Guangdong Provincial Key Laboratory of Big Data Computing, The Chinese University of Hong Kong, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Guangdong Provincial Key Laboratory of Big Data Computing, The Chinese University of Hong Kong (Shenzhen), Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]},{"raw_affiliation_string":"Guangdong Provincial Key Laboratory of Big Data Computing, The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5056495776"],"corresponding_institution_ids":["https://openalex.org/I92403157"],"apc_list":null,"apc_paid":null,"fwci":4.7589,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.94435774,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"19","issue":"4","first_page":"623","last_page":"634"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.949999988079071,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9254999756813049,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7755075693130493},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6234471797943115},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5065339803695679},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.48148930072784424},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.48128050565719604},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4717089831829071},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.435871958732605},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.18635305762290955},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.08103659749031067}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7755075693130493},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6234471797943115},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5065339803695679},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.48148930072784424},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.48128050565719604},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4717089831829071},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.435871958732605},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.18635305762290955},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.08103659749031067},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jstsp.2025.3558654","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2025.3558654","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.4000000059604645}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W48505526","https://openalex.org/W185399533","https://openalex.org/W1482149378","https://openalex.org/W1494198834","https://openalex.org/W2044893557","https://openalex.org/W2108524362","https://openalex.org/W2109215269","https://openalex.org/W2130178255","https://openalex.org/W2153894152","https://openalex.org/W2515728195","https://openalex.org/W2516001803","https://openalex.org/W2593116425","https://openalex.org/W2678916739","https://openalex.org/W2788241093","https://openalex.org/W2790428568","https://openalex.org/W2897371647","https://openalex.org/W2901279995","https://openalex.org/W2914067823","https://openalex.org/W2931364255","https://openalex.org/W2962788625","https://openalex.org/W2962866211","https://openalex.org/W2963082324","https://openalex.org/W2964171275","https://openalex.org/W2964207404","https://openalex.org/W2971417062","https://openalex.org/W2973231102","https://openalex.org/W2981851635","https://openalex.org/W2991361823","https://openalex.org/W3017350693","https://openalex.org/W3097777922","https://openalex.org/W3097945073","https://openalex.org/W3116242343","https://openalex.org/W3135222947","https://openalex.org/W3136499730","https://openalex.org/W3147539069","https://openalex.org/W3157866890","https://openalex.org/W3162475350","https://openalex.org/W3182657421","https://openalex.org/W3197729725","https://openalex.org/W4221162870","https://openalex.org/W4253928870","https://openalex.org/W4281663607","https://openalex.org/W4285258106","https://openalex.org/W4289665794","https://openalex.org/W4312271884","https://openalex.org/W4312367758","https://openalex.org/W4312779270","https://openalex.org/W4319585899","https://openalex.org/W4372260127","https://openalex.org/W4385807442","https://openalex.org/W4385823193","https://openalex.org/W4387934967","https://openalex.org/W4392902963","https://openalex.org/W4392904390","https://openalex.org/W4395471007","https://openalex.org/W4402112025","https://openalex.org/W4404788303","https://openalex.org/W4406461266","https://openalex.org/W4408354310","https://openalex.org/W4410087476","https://openalex.org/W6781751280","https://openalex.org/W6841484515","https://openalex.org/W6845577938","https://openalex.org/W6859298233","https://openalex.org/W6861342692","https://openalex.org/W6861387779"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W4245955731","https://openalex.org/W2393726419"],"abstract_inverted_index":{"Speech":[0,103],"enhancement":[1,137],"plays":[2],"an":[3],"essential":[4],"role":[5],"in":[6,44,92],"various":[7],"applications,":[8],"and":[9,34,83,154,169],"the":[10,23,30,51,61,78,86,112,128,135,142,152,173,176,192],"integration":[11],"of":[12,25,32,130,178],"visual":[13,58],"information":[14,119],"has":[15,77],"been":[16,65],"demonstrated":[17],"to":[18,81,115,126],"bring":[19],"substantial":[20],"advantages.":[21],"However,":[22],"majority":[24],"current":[26],"research":[27],"concentrates":[28],"on":[29,165],"examination":[31],"facial":[33],"lip":[35],"movements,":[36],"which":[37,132,150],"can":[38],"be":[39],"compromised":[40],"or":[41,49],"entirely":[42],"inaccessible":[43],"scenarios":[45],"where":[46,172],"occlusions":[47],"occur":[48],"when":[50,69],"camera":[52],"view":[53],"is":[54,111],"distant.":[55],"Whereas":[56],"contextual":[57,118],"cues":[59,125],"from":[60,120],"surrounding":[62],"environment":[63],"have":[64],"overlooked:":[66],"for":[67,157],"example,":[68],"we":[70,95,140],"see":[71],"a":[72,97],"dog":[73],"bark,":[74],"our":[75,107],"brain":[76],"innate":[79],"ability":[80],"discern":[82],"filter":[84],"out":[85],"barking":[87],"noise.":[88],"To":[89,106],"this":[90,93,110],"end,":[91],"paper,":[94],"introduce":[96],"novel":[98],"task,":[99],"i.e.":[100],"Scene-aware":[101],"Audio-Visual":[102],"Enhancement":[104],"(SAV-SE.":[105],"best":[108],"knowledge,":[109],"first":[113],"proposal":[114],"use":[116],"rich":[117],"synchronized":[121],"video":[122],"as":[123],"auxiliary":[124],"indicate":[127],"type":[129],"noise,":[131],"eventually":[133],"improves":[134],"speech":[136],"performance.":[138],"Specifically,":[139],"propose":[141],"VC-S":[143,179],"<inline-formula":[144,180],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[145,181,201],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[146,182],"notation=\"LaTeX\">$^{2}$</tex-math></inline-formula>":[147,183],"E":[148,184],"method,":[149],"incorporates":[151],"Conformer":[153],"Mamba":[155],"modules":[156],"their":[158],"complementary":[159],"strengths.":[160],"Extensive":[161],"experiments":[162],"are":[163],"conducted":[164],"public":[166],"MUSIC,":[167],"AVSpeech":[168],"AudioSet":[170],"datasets,":[171],"results":[174],"demonstrate":[175],"superiority":[177],"over":[185],"other":[186],"competitive":[187],"methods.":[188],"We":[189],"will":[190],"make":[191],"source":[193],"code":[194],"publicly":[195],"available.":[196],"Project":[197],"demo":[198],"page:":[199],"<uri":[200],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://AVSEPage.github.io/</uri>":[202]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
