{"id":"https://openalex.org/W7151043745","doi":"https://doi.org/10.1109/access.2026.3680911","title":"Cap4Bridge: Caption-Guided Cross-Modal Contextualization With Stochastic Augmentation for Text-Video Retrieval","display_name":"Cap4Bridge: Caption-Guided Cross-Modal Contextualization With Stochastic Augmentation for Text-Video Retrieval","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7151043745","doi":"https://doi.org/10.1109/access.2026.3680911"},"language":"en","primary_location":{"id":"doi:10.1109/access.2026.3680911","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3680911","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2026.3680911","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108943847","display_name":"MinJu Jeon","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Minju Jeon","raw_affiliation_strings":["Department of Data Science, Hanyang University, Seoul, South Korea"],"raw_orcid":"https://orcid.org/0009-0007-9577-7400","affiliations":[{"raw_affiliation_string":"Department of Data Science, Hanyang University, Seoul, South Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088829266","display_name":"HyunGee Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyungee Kim","raw_affiliation_strings":["Department of Data Science, Hanyang University, Seoul, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Data Science, Hanyang University, Seoul, South Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133020642","display_name":"Si-Woo Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Si-Woo Kim","raw_affiliation_strings":["Department of Data Science, Hanyang University, Seoul, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Data Science, Hanyang University, Seoul, South Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133057378","display_name":"Youngtaek Oh","orcid":null},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Youngtaek Oh","raw_affiliation_strings":["Department of Electrical Engineering, Korea Advanced Institute of Science and Technology, Daejeon, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Korea Advanced Institute of Science and Technology, Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133015580","display_name":"Soeun Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I11265461","display_name":"Korea Development Institute","ror":"https://ror.org/02h7vzs32","country_code":"KR","type":"facility","lineage":["https://openalex.org/I11265461","https://openalex.org/I4210097958"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Soeun Lee","raw_affiliation_strings":["AI Research and Development Division, CJ Group, Seoul, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AI Research and Development Division, CJ Group, Seoul, South Korea","institution_ids":["https://openalex.org/I11265461"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126323921","display_name":"Dong-Jin Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Dong-Jin Kim","raw_affiliation_strings":["Department of Data Science, Hanyang University, Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0001-7231-7494","affiliations":[{"raw_affiliation_string":"Department of Data Science, Hanyang University, Seoul, South Korea","institution_ids":["https://openalex.org/I4575257"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5108943847"],"corresponding_institution_ids":["https://openalex.org/I4575257"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.6837103,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":null,"first_page":"54442","last_page":"54453"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9571999907493591,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9571999907493591,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.029600000008940697,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.00430000014603138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/contextualization","display_name":"Contextualization","score":0.7210999727249146},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.26109999418258667},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.24330000579357147},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.22930000722408295}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7817000150680542},{"id":"https://openalex.org/C2780712339","wikidata":"https://www.wikidata.org/wiki/Q5165204","display_name":"Contextualization","level":3,"score":0.7210999727249146},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49219998717308044},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4708999991416931},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2685000002384186},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.26109999418258667},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.24330000579357147},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.22930000722408295},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2160000056028366},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.19900000095367432}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2026.3680911","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3680911","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:e1f34998585a4b748b2f3bfbe12d4553","is_oa":true,"landing_page_url":"https://doaj.org/article/e1f34998585a4b748b2f3bfbe12d4553","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 14, Pp 54442-54453 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2026.3680911","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3680911","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0],"key":[1,89],"challenge":[2],"in":[3,175],"text-video":[4],"retrieval":[5],"is":[6],"bridging":[7],"the":[8,57,124,130,137,151,156],"semantic":[9,126,152],"gap":[10],"between":[11],"information-rich":[12],"videos":[13],"and":[14,105,111,148,172],"concise":[15],"text":[16,53,106],"queries.":[17],"Existing":[18],"methods":[19,49],"often":[20],"address":[21],"this":[22],"by":[23,121,141],"incorporating":[24],"auxiliary":[25],"captions":[26,42],"from":[27,79],"Large":[28],"Language":[29],"Models":[30],"(LLMs)":[31],"or":[32],"employing":[33],"stochastic":[34,48],"modeling.":[35],"However,":[36],"these":[37,63,109],"approaches":[38],"face":[39],"critical":[40],"challenges:":[41],"can":[43],"lack":[44],"domain-specific":[45,81],"relevance,":[46],"while":[47,154],"that":[50,72],"directly":[51],"model":[52],"embeddings":[54],"risk":[55],"distorting":[56],"original":[58,157],"query\u2019s":[59,158],"intent.":[60,159],"To":[61],"overcome":[62],"issues,":[64],"we":[65],"propose":[66],"<italic":[67,74],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[68,75],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Cap4Bridge</i>,":[69],"a":[70,80,97],"framework":[71,86],"leverages":[73],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">semantic":[76],"anchors</i>":[77],"searched":[78,125],"caption":[82],"anchor":[83],"bank.":[84],"Our":[85,160],"introduces":[87],"two":[88],"components:":[90],"(1)":[91],"Caption-Guided":[92],"Cross-Modality":[93],"Contextualization,":[94],"which":[95,116],"uses":[96],"shared":[98],"co-attention":[99],"mechanism":[100],"to":[101,123,145],"enrich":[102],"both":[103,146],"video":[104],"representations":[107],"with":[108],"anchors,":[110],"(2)":[112],"Similarity-Aware":[113],"Stochastic":[114],"Augmentation,":[115],"applies":[117],"Gaussian":[118],"noise":[119],"scaled":[120],"relevance":[122],"anchors":[127],"rather":[128],"than":[129],"query":[131],"itself.":[132],"This":[133],"integrated":[134],"strategy":[135],"bridges":[136],"fundamental":[138],"information":[139],"imbalance":[140],"providing":[142],"complementary":[143],"context":[144],"modalities":[147],"robustly":[149],"expanding":[150],"representation":[153],"preserving":[155],"method":[161],"achieves":[162],"state-of-the-art":[163],"performance":[164],"across":[165],"most":[166],"benchmarks,":[167],"demonstrating":[168],"its":[169],"high":[170],"efficacy":[171],"generalizability,":[173],"particularly":[174],"challenging":[176],"cross-domain":[177],"settings.":[178]},"counts_by_year":[],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2026-04-07T00:00:00"}
