{"id":"https://openalex.org/W4415538364","doi":"https://doi.org/10.1145/3746027.3762038","title":"EVENT-Retriever: Event-Aware Multimodal Image Retrieval for Realistic Captions","display_name":"EVENT-Retriever: Event-Aware Multimodal Image Retrieval for Realistic Captions","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415538364","doi":"https://doi.org/10.1145/3746027.3762038"},"language":"en","primary_location":{"id":"doi:10.1145/3746027.3762038","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3762038","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2509.00751","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002520573","display_name":"Dinh-Khoi Vo","orcid":"https://orcid.org/0000-0001-8831-8846"},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]},{"id":"https://openalex.org/I23582244","display_name":"Ho Chi Minh City University of Science","ror":"https://ror.org/05jfbgm49","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I23582244"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Dinh-Khoi Vo","raw_affiliation_strings":["University of Science, VNU-HCM, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"University of Science, VNU-HCM, Ho Chi Minh, Vietnam","institution_ids":["https://openalex.org/I23582244","https://openalex.org/I123565023"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101826118","display_name":"Van-Loc Nguyen","orcid":"https://orcid.org/0000-0001-9351-3750"},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]},{"id":"https://openalex.org/I23582244","display_name":"Ho Chi Minh City University of Science","ror":"https://ror.org/05jfbgm49","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I23582244"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Van-Loc Nguyen","raw_affiliation_strings":["University of Science, VNU-HCM, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"University of Science, VNU-HCM, Ho Chi Minh, Vietnam","institution_ids":["https://openalex.org/I23582244","https://openalex.org/I123565023"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053495766","display_name":"Minh\u2013Triet Tran","orcid":"https://orcid.org/0000-0003-3046-3041"},"institutions":[{"id":"https://openalex.org/I23582244","display_name":"Ho Chi Minh City University of Science","ror":"https://ror.org/05jfbgm49","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I23582244"]},{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Minh-Triet Tran","raw_affiliation_strings":["University of Science, VNU-HCM, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"University of Science, VNU-HCM, Ho Chi Minh, Vietnam","institution_ids":["https://openalex.org/I23582244","https://openalex.org/I123565023"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062359795","display_name":"Trung-Nghia Le","orcid":"https://orcid.org/0000-0002-7363-2610"},"institutions":[{"id":"https://openalex.org/I23582244","display_name":"Ho Chi Minh City University of Science","ror":"https://ror.org/05jfbgm49","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I23582244"]},{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Trung-Nghia Le","raw_affiliation_strings":["University of Science, VNU-HCM, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"University of Science, VNU-HCM, Ho Chi Minh, Vietnam","institution_ids":["https://openalex.org/I23582244","https://openalex.org/I123565023"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5002520573"],"corresponding_institution_ids":["https://openalex.org/I123565023","https://openalex.org/I23582244"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.32416291,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"14257","last_page":"14263"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.7476999759674072},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5192000269889832},{"id":"https://openalex.org/keywords/mean-reciprocal-rank","display_name":"Mean reciprocal rank","score":0.47450000047683716},{"id":"https://openalex.org/keywords/visual-word","display_name":"Visual Word","score":0.4602999985218048},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.4178999960422516},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4124999940395355},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.4059999883174896},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.4059999883174896},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.3813999891281128}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7871999740600586},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.7476999759674072},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5885999798774719},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5192000269889832},{"id":"https://openalex.org/C44083865","wikidata":"https://www.wikidata.org/wiki/Q3853443","display_name":"Mean reciprocal rank","level":2,"score":0.47450000047683716},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.4602999985218048},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4180000126361847},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.4178999960422516},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4124999940395355},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.4059999883174896},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.4059999883174896},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.3813999891281128},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.37940001487731934},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.3671000003814697},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.36489999294281006},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3465999960899353},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.32850000262260437},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3089999854564667},{"id":"https://openalex.org/C199579030","wikidata":"https://www.wikidata.org/wiki/Q2851778","display_name":"Automatic image annotation","level":4,"score":0.30889999866485596},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.2989000082015991},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2924000024795532},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2921999990940094},{"id":"https://openalex.org/C180462255","wikidata":"https://www.wikidata.org/wiki/Q3559736","display_name":"Standard test image","level":4,"score":0.29089999198913574},{"id":"https://openalex.org/C2986492983","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image matching","level":3,"score":0.28119999170303345},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2711000144481659},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.26510000228881836},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.2565999925136566}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3746027.3762038","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3762038","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2509.00751","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.00751","pdf_url":"https://arxiv.org/pdf/2509.00751","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2509.00751","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.00751","pdf_url":"https://arxiv.org/pdf/2509.00751","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W2132201434","https://openalex.org/W2148972377","https://openalex.org/W3090449556","https://openalex.org/W3119589206","https://openalex.org/W3203247393","https://openalex.org/W4312407537","https://openalex.org/W4323042875","https://openalex.org/W4390873312","https://openalex.org/W4405595839"],"related_works":[],"abstract_inverted_index":{"Event-based":[0],"image":[1,67,91,143],"retrieval":[2,28,55,139],"from":[3,102],"free-form":[4],"captions":[5,34],"presents":[6],"a":[7,53],"significant":[8],"challenge:":[9],"models":[10],"must":[11],"understand":[12],"not":[13],"only":[14],"visual":[15],"features":[16],"but":[17],"also":[18],"latent":[19],"event":[20],"semantics,":[21],"context,":[22,41],"and":[23,65,74,87,97,137],"real-world":[24,142],"knowledge.":[25],"Conventional":[26],"vision-language":[27],"approaches":[29],"often":[30],"fall":[31],"short":[32],"when":[33],"describe":[35],"abstract":[36],"events,":[37],"implicit":[38],"causality,":[39],"temporal":[40],"or":[42],"contain":[43],"long,":[44],"complex":[45],"narratives.":[46],"To":[47,93],"tackle":[48],"these":[49],"issues,":[50],"we":[51,99],"introduce":[52],"multi-stage":[54],"framework":[56],"combining":[57,134],"dense":[58],"article":[59,81],"retrieval,":[60],"event-aware":[61],"language":[62],"model":[63],"reranking,":[64],"efficient":[66],"collection,":[68],"followed":[69],"by":[70],"caption-guided":[71],"semantic":[72],"matching":[73],"rank-aware":[75],"selection.":[76],"We":[77],"leverage":[78],"Qwen3":[79],"for":[80,84,89,140],"search,":[82],"Qwen3-Reranker":[83],"contextual":[85],"alignment,":[86],"Qwen2-VL":[88],"precise":[90],"scoring.":[92],"further":[94],"enhance":[95],"performance":[96],"robustness,":[98],"fuse":[100],"outputs":[101],"multiple":[103],"configurations":[104],"using":[105],"Reciprocal":[106],"Rank":[107],"Fusion":[108],"(RRF).":[109],"Our":[110],"system":[111],"achieves":[112],"the":[113,117,125,131],"top-1":[114],"score":[115],"on":[116],"private":[118],"test":[119],"set":[120],"of":[121,133],"Track":[122],"2":[123],"in":[124],"EVENTA":[126],"2025":[127],"Grand":[128],"Challenge,":[129],"demonstrating":[130],"effectiveness":[132],"language-based":[135],"reasoning":[136],"multimodal":[138],"complex,":[141],"understanding.":[144],"The":[145],"code":[146],"is":[147],"available":[148],"at":[149],"https://github.com/vdkhoi20/EVENT-Retriever.":[150]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-25T00:00:00"}
