{"id":"https://openalex.org/W4416965999","doi":"https://doi.org/10.1109/tmm.2025.3640018","title":"Modality-Aware Gated Attention Network for Audio-Visual Event Localization","display_name":"Modality-Aware Gated Attention Network for Audio-Visual Event Localization","publication_year":2025,"publication_date":"2025-12-03","ids":{"openalex":"https://openalex.org/W4416965999","doi":"https://doi.org/10.1109/tmm.2025.3640018"},"language":null,"primary_location":{"id":"doi:10.1109/tmm.2025.3640018","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3640018","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100733028","display_name":"Liang Liu","orcid":"https://orcid.org/0000-0002-0112-1264"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liang Liu","raw_affiliation_strings":["School of Computer Science and Technology, Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021012603","display_name":"Shuaiyong Li","orcid":"https://orcid.org/0000-0002-3914-5173"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuaiyong Li","raw_affiliation_strings":["School of Automation, Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Automation, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001825352","display_name":"Yongqiang Zhu","orcid":"https://orcid.org/0000-0002-2215-8699"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongqiang Zhu","raw_affiliation_strings":["School of Computer Science and Technology, Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075922276","display_name":"Zhengxu Dai","orcid":"https://orcid.org/0000-0001-8641-1019"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengxu Dai","raw_affiliation_strings":["School of Automation, Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"School of Automation, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100733028"],"corresponding_institution_ids":["https://openalex.org/I10535382"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.48706942,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":null,"first_page":"1601","last_page":"1612"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.529699981212616,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.529699981212616,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.3953999876976013,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.0142000000923872,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6298999786376953},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.6133000254631042},{"id":"https://openalex.org/keywords/independence","display_name":"Independence (probability theory)","score":0.5486000180244446},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.49729999899864197},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.42570000886917114},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4108000099658966},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.399399995803833},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3880000114440918}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8511999845504761},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6413999795913696},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6298999786376953},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.6133000254631042},{"id":"https://openalex.org/C35651441","wikidata":"https://www.wikidata.org/wiki/Q625303","display_name":"Independence (probability theory)","level":2,"score":0.5486000180244446},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.49729999899864197},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.42570000886917114},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4108000099658966},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.399399995803833},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3880000114440918},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37229999899864197},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3702000081539154},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.3686999976634979},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.3605000078678131},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3368000090122223},{"id":"https://openalex.org/C2986089797","wikidata":"https://www.wikidata.org/wiki/Q6501338","display_name":"Visual attention","level":3,"score":0.31610000133514404},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30149999260902405},{"id":"https://openalex.org/C2776289891","wikidata":"https://www.wikidata.org/wiki/Q1931511","display_name":"Neglect","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.27489998936653137},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.26750001311302185}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3640018","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3640018","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W432419735","https://openalex.org/W2032337854","https://openalex.org/W2065274193","https://openalex.org/W2105582566","https://openalex.org/W2194775991","https://openalex.org/W2526050071","https://openalex.org/W2593116425","https://openalex.org/W2618530766","https://openalex.org/W2619697695","https://openalex.org/W2884293275","https://openalex.org/W2931433835","https://openalex.org/W2962865004","https://openalex.org/W2964109005","https://openalex.org/W2982619606","https://openalex.org/W2986131686","https://openalex.org/W2990113535","https://openalex.org/W2997909293","https://openalex.org/W3015925607","https://openalex.org/W3021321555","https://openalex.org/W3034742263","https://openalex.org/W3093287838","https://openalex.org/W3110268547","https://openalex.org/W3110606395","https://openalex.org/W3118120400","https://openalex.org/W3133481345","https://openalex.org/W3154807520","https://openalex.org/W3175300676","https://openalex.org/W3175514052","https://openalex.org/W3176445421","https://openalex.org/W3214311327","https://openalex.org/W4200436929","https://openalex.org/W4210416950","https://openalex.org/W4211154280","https://openalex.org/W4226025707","https://openalex.org/W4226206782","https://openalex.org/W4226314236","https://openalex.org/W4280492076","https://openalex.org/W4289752563","https://openalex.org/W4312383651","https://openalex.org/W4312415534","https://openalex.org/W4313123347","https://openalex.org/W4367146821","https://openalex.org/W4372260310","https://openalex.org/W4386066074","https://openalex.org/W4386072368","https://openalex.org/W4386113246","https://openalex.org/W4386523246","https://openalex.org/W4387682108","https://openalex.org/W4387968342","https://openalex.org/W4390872419","https://openalex.org/W4391594029","https://openalex.org/W4393147243","https://openalex.org/W4399146361","https://openalex.org/W4399426547","https://openalex.org/W4400157618","https://openalex.org/W4402660094","https://openalex.org/W4402727889","https://openalex.org/W4405717843","https://openalex.org/W7133238718","https://openalex.org/W7133244573","https://openalex.org/W7133245111"],"related_works":[],"abstract_inverted_index":{"Audio-visual":[0],"event":[1,18],"localization":[2,128,133],"(AVEL)":[3],"refers":[4],"to":[5,45,54,164],"the":[6,9,12,30,70,122,125,131,180,184],"identification":[7],"of":[8,16,188],"category":[10],"and":[11,23,39,106,130,142,158,173,186,197],"corresponding":[13],"temporal":[14],"boundaries":[15],"an":[17],"that":[19,96,152],"is":[20,118,162],"both":[21,154,195],"visually":[22],"audibly":[24],"discernible":[25],"in":[26,194],"unconstrained":[27],"videos.":[28],"However,":[29],"event-irrelevant":[31],"background":[32,56],"(e.g.,":[33],"ambient":[34],"noise":[35],"or":[36,66],"visual":[37,100],"occlusion)":[38],"event-specific":[40,108,166],"modal":[41],"biases":[42,168],"often":[43],"lead":[44],"audio-visual":[46],"semantic":[47],"inconsistency.":[48],"Existing":[49],"methods":[50],"utilize":[51],"modality-guided":[52,132],"attention":[53,62,141],"suppress":[55],"interference,":[57],"but":[58],"they":[59],"neglect":[60],"this":[61,75],"inevitably":[63],"introduces":[64],"redundant":[65],"irrelevant":[67],"information":[68],"from":[69],"other":[71],"modality.":[72],"To":[73],"alleviate":[74],"problem,":[76],"we":[77],"propose":[78],"a":[79,112,146],"novel":[80],"<underline":[81,86,89,92],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[82,84,87,90,93],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">M</u>odality-<underline":[83],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">A</u>ware":[85],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">G</u>ated":[88],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">A</u>ttention":[91],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">N</u>etwork":[94],"(MAGAN)":[95],"focuses":[97],"on":[98,179],"event-relevant":[99],"regions,":[101],"consolidates":[102],"informative":[103],"audio":[104],"frequencies,":[105],"captures":[107],"modality":[109,167],"biases.":[110],"Specifically,":[111],"cross-modal":[113,147],"gated":[114,137,148,156,160],"co-attention":[115],"(CMGCA)":[116],"scheme":[117],"presented":[119],"for":[120],"modeling":[121],"correspondence":[123],"between":[124],"potential":[126],"(self-guided)":[127],"maps":[129,134],"through":[135],"two":[136],"components,":[138],"i.e.,":[139],"audio-to-visual":[140],"visual-to-audio":[143],"attention.":[144],"Furthermore,":[145],"co-interaction":[149],"(CMGCI)":[150],"mechanism":[151],"incorporates":[153],"unimodal":[155,171],"interaction":[157,161],"multimodal":[159,174],"introduced":[163],"capture":[165],"by":[169],"considering":[170],"independence":[172],"synergy":[175],"simultaneously.":[176],"Extensive":[177],"experiments":[178],"AVE":[181,199],"dataset":[182],"demonstrate":[183],"superiority":[185],"effectiveness":[187],"our":[189],"model":[190],"over":[191],"state-of-the-art":[192],"approaches":[193],"fully-":[196],"weakly-supervised":[198],"settings.":[200]},"counts_by_year":[],"updated_date":"2026-03-09T07:00:12.390032","created_date":"2025-12-03T00:00:00"}
