{"id":"https://openalex.org/W4415708400","doi":"https://doi.org/10.1109/icme59968.2025.11209557","title":"Cross-Modal Semantic-Aware Network for Audio-Visual Event Localization","display_name":"Cross-Modal Semantic-Aware Network for Audio-Visual Event Localization","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415708400","doi":"https://doi.org/10.1109/icme59968.2025.11209557"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209557","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209557","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100322388","display_name":"Liang Liu","orcid":"https://orcid.org/0000-0002-9407-7687"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liang Liu","raw_affiliation_strings":["Chongqing University of Posts and Telecommunications,School of Computer Science and Technology,P. R. China,400065"],"affiliations":[{"raw_affiliation_string":"Chongqing University of Posts and Telecommunications,School of Computer Science and Technology,P. R. China,400065","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021012603","display_name":"Shuaiyong Li","orcid":"https://orcid.org/0000-0002-3914-5173"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuaiyong Li","raw_affiliation_strings":["Chongqing University of Posts and Telecommunications,School of Automation,P. R. China,400065"],"affiliations":[{"raw_affiliation_string":"Chongqing University of Posts and Telecommunications,School of Automation,P. R. China,400065","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001825352","display_name":"Yongqiang Zhu","orcid":"https://orcid.org/0000-0002-2215-8699"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongqiang Zhu","raw_affiliation_strings":["Chongqing University of Posts and Telecommunications,School of Computer Science and Technology,P. R. China,400065"],"affiliations":[{"raw_affiliation_string":"Chongqing University of Posts and Telecommunications,School of Computer Science and Technology,P. R. China,400065","institution_ids":["https://openalex.org/I10535382"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100322388"],"corresponding_institution_ids":["https://openalex.org/I10535382"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39253248,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.32120001316070557,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.32120001316070557,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.226500004529953,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.09000000357627869,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.7835000157356262},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.670799970626831},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5133000016212463},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5011000037193298},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.4814999997615814},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.4474000036716461},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3968999981880188},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.36390000581741333}],"concepts":[{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.7835000157356262},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7815999984741211},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.670799970626831},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5422999858856201},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5133000016212463},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5011000037193298},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.4814999997615814},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.4474000036716461},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41589999198913574},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3968999981880188},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.36390000581741333},{"id":"https://openalex.org/C2776544517","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Unexpected events","level":2,"score":0.3257000148296356},{"id":"https://openalex.org/C198942812","wikidata":"https://www.wikidata.org/wiki/Q496618","display_name":"Semantic property","level":2,"score":0.3181999921798706},{"id":"https://openalex.org/C85407183","wikidata":"https://www.wikidata.org/wiki/Q1045785","display_name":"Semantic network","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2621000111103058},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.25929999351501465},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.2565000057220459}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209557","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209557","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321135","display_name":"Chongqing University","ror":"https://ror.org/023rhb549"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2032337854","https://openalex.org/W2593116425","https://openalex.org/W2931433835","https://openalex.org/W2964109005","https://openalex.org/W2997909293","https://openalex.org/W3015925607","https://openalex.org/W3093287838","https://openalex.org/W3110606395","https://openalex.org/W3175514052","https://openalex.org/W3176445421","https://openalex.org/W3214311327","https://openalex.org/W4211154280","https://openalex.org/W4226025707","https://openalex.org/W4226206782","https://openalex.org/W4312383651","https://openalex.org/W4312415534","https://openalex.org/W4367146821","https://openalex.org/W4372260310","https://openalex.org/W4385245566","https://openalex.org/W4386113246","https://openalex.org/W4387682108","https://openalex.org/W4387968342","https://openalex.org/W4399426547","https://openalex.org/W4399474077","https://openalex.org/W4403942708"],"related_works":[],"abstract_inverted_index":{"Audio-Visual":[0],"event":[1,8,17],"localization":[2],"(AVEL)":[3],"aims":[4],"to":[5,36,51,106,116,140,142],"identify":[6],"the":[7,16,42,49,58,118,152,156,161],"category":[9],"and":[10,21,74,77,100,124,137,146,158],"temporal":[11,76,123],"boundaries":[12],"in":[13],"videos,":[14],"where":[15],"is":[18],"simultaneously":[19],"audible":[20],"visible.":[22],"Existing":[23],"methods":[24],"primarily":[25],"rely":[26],"on":[27,38,53,151],"modality-guided":[28],"attention":[29],"mechanisms":[30],"(e.g.,":[31],"audio-":[32],"or":[33],"visual-guided":[34],"attention)":[35],"focus":[37,52],"event-relevant":[39,109],"content.":[40,55],"However,":[41],"lack":[43],"of":[44,160],"semantic":[45,93,98,104,120],"guidance":[46],"inevitably":[47],"leads":[48],"model":[50],"event-irrelevant":[54],"To":[56],"address":[57],"above":[59],"bottleneck,":[60],"we":[61,83,127],"propose":[62],"a":[63,85,96,129],"novel":[64],"Cross-Modal":[65],"Semantic-Aware":[66],"Network":[67],"(CMSAN)":[68],"that":[69,91],"selects":[70],"semantically":[71],"consistent":[72],"segments":[73,110],"explores":[75],"channel":[78,125],"interactions":[79],"across":[80,122],"modalities.":[81],"Specifically,":[82],"devise":[84],"class-aware":[86],"segment":[87],"selection":[88],"(CASS)":[89],"module":[90],"embeds":[92],"classes":[94],"into":[95],"shared":[97],"space":[99],"then":[101],"leverages":[102],"segment-level":[103],"similarities":[105],"effectively":[107],"distinguish":[108],"while":[111],"compromising":[112],"irrelevant":[113],"ones.":[114],"Furthermore,":[115],"capture":[117],"cross-modal":[119],"cues":[121],"dimensions,":[126],"introduce":[128],"modality-aware":[130],"co-attention":[131],"(MACA)":[132],"module,":[133],"which":[134],"features":[135],"symmetrical":[136],"weight-sharing":[138],"components":[139],"attend":[141],"informative":[143],"visual":[144],"contexts":[145],"audio":[147],"frequencies.":[148],"Extensive":[149],"experiments":[150],"AVE":[153],"dataset":[154],"demonstrate":[155],"superiority":[157],"effectiveness":[159],"proposed":[162],"model.":[163]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-30T00:00:00"}
