{"id":"https://openalex.org/W4413120602","doi":"https://doi.org/10.1109/tcsvt.2025.3597097","title":"VisualRAG: Knowledge-Guided Retrieval Augmentation for Image-Text Matching","display_name":"VisualRAG: Knowledge-Guided Retrieval Augmentation for Image-Text Matching","publication_year":2025,"publication_date":"2025-08-08","ids":{"openalex":"https://openalex.org/W4413120602","doi":"https://doi.org/10.1109/tcsvt.2025.3597097"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3597097","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3597097","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084014445","display_name":"Hengchang Wang","orcid":"https://orcid.org/0000-0003-3097-1986"},"institutions":[{"id":"https://openalex.org/I28006308","display_name":"Shandong Normal University","ror":"https://ror.org/01wy3h363","country_code":"CN","type":"education","lineage":["https://openalex.org/I28006308"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hengchang Wang","raw_affiliation_strings":["School of Information Science and Engineering, Shandong Normal University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Shandong Normal University, Jinan, China","institution_ids":["https://openalex.org/I28006308"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418866","display_name":"Li Liu","orcid":"https://orcid.org/0000-0002-9121-5124"},"institutions":[{"id":"https://openalex.org/I28006308","display_name":"Shandong Normal University","ror":"https://ror.org/01wy3h363","country_code":"CN","type":"education","lineage":["https://openalex.org/I28006308"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Liu","raw_affiliation_strings":["School of Information Science and Engineering, Shandong Normal University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Shandong Normal University, Jinan, China","institution_ids":["https://openalex.org/I28006308"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113755120","display_name":"Huaxiang Zhang","orcid":"https://orcid.org/0000-0001-6259-7533"},"institutions":[{"id":"https://openalex.org/I28006308","display_name":"Shandong Normal University","ror":"https://ror.org/01wy3h363","country_code":"CN","type":"education","lineage":["https://openalex.org/I28006308"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaxiang Zhang","raw_affiliation_strings":["School of Information Science and Engineering, Shandong Normal University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Shandong Normal University, Jinan, China","institution_ids":["https://openalex.org/I28006308"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108048954","display_name":"Lei Zhu","orcid":"https://orcid.org/0000-0002-2993-7142"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Zhu","raw_affiliation_strings":["School of Electronic and Information Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034967388","display_name":"Xiaojun Chang","orcid":"https://orcid.org/0000-0002-7778-8807"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojun Chang","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100764474","display_name":"Hao Du","orcid":"https://orcid.org/0009-0000-3599-8665"},"institutions":[{"id":"https://openalex.org/I28006308","display_name":"Shandong Normal University","ror":"https://ror.org/01wy3h363","country_code":"CN","type":"education","lineage":["https://openalex.org/I28006308"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Du","raw_affiliation_strings":["School of Information Science and Engineering, Shandong Normal University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Shandong Normal University, Jinan, China","institution_ids":["https://openalex.org/I28006308"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5084014445"],"corresponding_institution_ids":["https://openalex.org/I28006308"],"apc_list":null,"apc_paid":null,"fwci":1.3621,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.8381215,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"36","issue":"1","first_page":"1234","last_page":"1248"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7221127152442932},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.5896321535110474},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5715755820274353},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5138959884643555},{"id":"https://openalex.org/keywords/image-matching","display_name":"Image matching","score":0.474568635225296},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.42947399616241455},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.42758113145828247},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4136180281639099},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.38724827766418457},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15502649545669556}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7221127152442932},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.5896321535110474},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5715755820274353},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5138959884643555},{"id":"https://openalex.org/C2986492983","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image matching","level":3,"score":0.474568635225296},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.42947399616241455},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.42758113145828247},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4136180281639099},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38724827766418457},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15502649545669556},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3597097","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3597097","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8631945552","display_name":null,"funder_award_id":"No. 62076153","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8633247868","display_name":null,"funder_award_id":"No. 62176144","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W2795832645","https://openalex.org/W2962964995","https://openalex.org/W2987671777","https://openalex.org/W3035212740","https://openalex.org/W3035552787","https://openalex.org/W3035588244","https://openalex.org/W3043826557","https://openalex.org/W3091588028","https://openalex.org/W3118694826","https://openalex.org/W3175888430","https://openalex.org/W3213942508","https://openalex.org/W4210894218","https://openalex.org/W4214587494","https://openalex.org/W4224952037","https://openalex.org/W4312913651","https://openalex.org/W4313178921","https://openalex.org/W4313181088","https://openalex.org/W4317436342","https://openalex.org/W4323338501","https://openalex.org/W4360897523","https://openalex.org/W4378697123","https://openalex.org/W4381786155","https://openalex.org/W4382202923","https://openalex.org/W4386065291","https://openalex.org/W4386065353","https://openalex.org/W4386071498","https://openalex.org/W4386071757","https://openalex.org/W4386072185","https://openalex.org/W4386072365","https://openalex.org/W4386160421","https://openalex.org/W4386234130","https://openalex.org/W4387951800","https://openalex.org/W4387967913","https://openalex.org/W4387968136","https://openalex.org/W4389170676","https://openalex.org/W4389352609","https://openalex.org/W4390603585","https://openalex.org/W4391216079","https://openalex.org/W4391744111","https://openalex.org/W4392152051","https://openalex.org/W4392678259","https://openalex.org/W4393154020","https://openalex.org/W4394865305","https://openalex.org/W4395447490","https://openalex.org/W4396712842","https://openalex.org/W4402727527","https://openalex.org/W4402727895","https://openalex.org/W4403420955","https://openalex.org/W4403439692","https://openalex.org/W4403780697","https://openalex.org/W4404873588","https://openalex.org/W4409366257"],"related_works":["https://openalex.org/W2384918310","https://openalex.org/W2383808867","https://openalex.org/W2372581239","https://openalex.org/W2107893065","https://openalex.org/W2617958085","https://openalex.org/W1509862229","https://openalex.org/W1974208548","https://openalex.org/W2050706403","https://openalex.org/W1973922169","https://openalex.org/W1519745258"],"abstract_inverted_index":{"Image-text":[0],"matching":[1],"as":[2],"a":[3,27,74],"fundamental":[4],"cross-modal":[5,140,180],"understanding":[6],"task":[7],"presents":[8],"unique":[9],"challenges":[10],"in":[11,65,114,192],"weakly-aligned":[12],"scenarios.":[13],"Such":[14],"data":[15,42],"typically":[16],"feature":[17,53,141,176],"highly":[18],"abstract":[19],"textual":[20],"captions":[21],"with":[22,31,59,125],"sparse":[23],"entity":[24,127],"references,":[25],"creating":[26],"significant":[28],"semantic":[29,167],"gap":[30],"visual":[32],"content.":[33],"Current":[34],"mainstream":[35],"methods,":[36],"primarily":[37],"designed":[38],"for":[39],"strongly":[40],"aligned":[41,67],"pairs,":[43],"employ":[44],"dynamic":[45],"modeling":[46],"or":[47],"multi-dimensional":[48],"similarity":[49,147],"computation":[50],"to":[51,149],"achieve":[52],"space":[54],"mapping.":[55],"However,":[56],"they":[57],"struggle":[58],"information":[60,104],"asymmetry":[61],"and":[62,98,159,169,199],"modal":[63],"heterogeneity":[64],"weakly":[66],"cases.":[68],"To":[69],"address":[70],"this,":[71],"we":[72],"propose":[73],"Visual":[75],"Perception":[76],"Knowledge":[77],"Enhancement":[78],"(VPKE)":[79],"framework.":[80],"Unlike":[81],"existing":[82],"methods":[83],"based":[84],"on":[85,196],"strong":[86],"alignment":[87],"assumptions,":[88],"this":[89],"framework":[90,144],"mines":[91],"latent":[92],"image":[93],"semantics":[94],"through":[95],"vision-language":[96],"models":[97],"generates":[99],"auxiliary":[100,160],"captions,":[101,161],"overcoming":[102],"the":[103,187,197],"bottleneck":[105],"of":[106,165],"traditional":[107],"text":[108],"modalities.":[109],"Its":[110],"core":[111],"innovation":[112],"lies":[113],"an":[115],"adaptive":[116,163],"knowledge":[117,137],"distillation":[118],"mechanism":[119,130],"that":[120,186],"combines":[121],"retrieval-augmented":[122],"generation":[123],"(RAG)":[124],"key":[126,157],"extraction.":[128],"This":[129],"effectively":[131],"filters":[132],"noise":[133],"when":[134],"introducing":[135],"external":[136],"while":[138],"optimizing":[139],"integration.":[142],"The":[143],"employs":[145],"multi-level":[146],"evaluation":[148],"dynamically":[150],"adjust":[151],"fusion":[152],"weights":[153],"among":[154],"original":[155],"text,":[156],"entities,":[158],"enabling":[162],"integration":[164],"diverse":[166],"features":[168],"significantly":[170],"improving":[171],"model":[172],"flexibility.":[173],"Additionally,":[174],"multi-scale":[175],"extraction":[177],"further":[178],"enhances":[179],"representation":[181],"capabilities.":[182],"Experimental":[183],"results":[184],"show":[185],"proposed":[188],"method":[189],"performs":[190],"excellently":[191],"image-text":[193],"retrieval":[194],"tasks":[195],"MSCOCO":[198],"Flickr30K":[200],"datasets,":[201],"validating":[202],"its":[203],"effectiveness.":[204]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-16T09:10:04.655348","created_date":"2025-10-10T00:00:00"}
