{"id":"https://openalex.org/W7124435788","doi":"https://doi.org/10.1109/tip.2025.3649356","title":"Vision Enhancing LLMs: Empowering Multimodal Knowledge Storage and Sharing in LLMs","display_name":"Vision Enhancing LLMs: Empowering Multimodal Knowledge Storage and Sharing in LLMs","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7124435788","doi":"https://doi.org/10.1109/tip.2025.3649356","pmid":"https://pubmed.ncbi.nlm.nih.gov/41543962"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2025.3649356","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2025.3649356","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123212131","display_name":"Yunxin Li","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yunxin Li","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123243755","display_name":"Zhenyu Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenyu Liu","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Baotian Hu","orcid":"https://orcid.org/0009-0000-0640-6990"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baotian Hu","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123216134","display_name":"Wei Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Wang","raw_affiliation_strings":["School of Cyber Science and Technology, Sun Yat-sen University, Shenzhen Campus, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Cyber Science and Technology, Sun Yat-sen University, Shenzhen Campus, Shenzhen, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123243357","display_name":"Yuxin Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxin Ding","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiaochun Cao","orcid":"https://orcid.org/0000-0001-7141-708X"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaochun Cao","raw_affiliation_strings":["School of Cyber Science and Technology, Sun Yat-sen University, Shenzhen Campus, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Cyber Science and Technology, Sun Yat-sen University, Shenzhen Campus, Shenzhen, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123229893","display_name":"Min Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5123212131"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13040144,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"35","issue":null,"first_page":"858","last_page":"871"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9781000018119812,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9781000018119812,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.002099999925121665,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.0019000000320374966,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.482699990272522},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.374099999666214},{"id":"https://openalex.org/keywords/multimodality","display_name":"Multimodality","score":0.3422999978065491},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.30329999327659607},{"id":"https://openalex.org/keywords/neglect","display_name":"Neglect","score":0.2874000072479248},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.28540000319480896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5781000256538391},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.482699990272522},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.3828999996185303},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.374099999666214},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.35030001401901245},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.3422999978065491},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33649998903274536},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.30329999327659607},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.29490000009536743},{"id":"https://openalex.org/C2776289891","wikidata":"https://www.wikidata.org/wiki/Q1931511","display_name":"Neglect","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C2776854237","wikidata":"https://www.wikidata.org/wiki/Q6031064","display_name":"Information sharing","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2700999975204468},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.2676999866962433}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2025.3649356","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2025.3649356","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:41543962","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41543962","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W1933349210","https://openalex.org/W2003170434","https://openalex.org/W2890894339","https://openalex.org/W2946609015","https://openalex.org/W2947312908","https://openalex.org/W2949579048","https://openalex.org/W2963096510","https://openalex.org/W2970062726","https://openalex.org/W2979382951","https://openalex.org/W2981952041","https://openalex.org/W2988326850","https://openalex.org/W2998617917","https://openalex.org/W3004268082","https://openalex.org/W3015063797","https://openalex.org/W3091588028","https://openalex.org/W3120043490","https://openalex.org/W3134329288","https://openalex.org/W3159959439","https://openalex.org/W3168900788","https://openalex.org/W3174770825","https://openalex.org/W3175270222","https://openalex.org/W3176641147","https://openalex.org/W3176824248","https://openalex.org/W3185341429","https://openalex.org/W4224903949","https://openalex.org/W4285263089","https://openalex.org/W4285294723","https://openalex.org/W4292258164","https://openalex.org/W4385245566","https://openalex.org/W4385571155","https://openalex.org/W4385572634","https://openalex.org/W4385573694","https://openalex.org/W4386065803","https://openalex.org/W4386566578","https://openalex.org/W4389518966","https://openalex.org/W4390075233","https://openalex.org/W4390357571","https://openalex.org/W4390872747","https://openalex.org/W4391454521","https://openalex.org/W4393252868","https://openalex.org/W4400647053","https://openalex.org/W4402716330","https://openalex.org/W4402727764","https://openalex.org/W4402753980","https://openalex.org/W4403058886","https://openalex.org/W4403081466","https://openalex.org/W4404356490","https://openalex.org/W4405399726","https://openalex.org/W4407451021","https://openalex.org/W4410536678"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,109,145,168],"multimodal":[3,11,40,149,183],"large":[4],"language":[5,24],"models":[6,18],"(MLLMs)":[7],"have":[8],"achieved":[9],"significant":[10],"generation":[12,34],"capabilities,":[13],"akin":[14],"to":[15,38,74,128,147],"GPT-4.":[16],"These":[17],"predominantly":[19],"map":[20],"visual":[21,58,72,131],"information":[22,132],"into":[23,121],"representation":[25],"space,":[26],"leveraging":[27],"the":[28,68,76,122,163],"vast":[29],"knowledge":[30,73,150],"and":[31,60,107],"powerful":[32],"text":[33,153],"abilities":[35],"of":[36,53,70,79,125,140,166],"LLMs":[37,49,56,101,146,167],"produce":[39],"instruction-following":[41],"responses.":[42],"We":[43],"could":[44,82],"term":[45],"this":[46,90],"method":[47],"as":[48,85],"for":[50,57],"Vision":[51,86],"because":[52],"its":[54],"employing":[55],"understanding":[59,182],"reasoning,":[61],"yet":[62],"observe":[63],"that":[64,159],"these":[65],"MLLMs":[66],"neglect":[67],"potential":[69],"harnessing":[71],"enhance":[75],"overall":[77],"capabilities":[78,165],"LLMs,":[80,126],"which":[81],"be":[83,188],"regarded":[84],"Enhancing":[87],"LLMs.":[88,110],"In":[89],"paper,":[91],"we":[92,112,135],"propose":[93],"an":[94],"approach":[95],"called":[96],"MKS2,":[97],"aimed":[98],"at":[99],"enhancing":[100],"through":[102],"empowering":[103],"Multimodal":[104,141],"Knowledge":[105],"Storage":[106],"Sharing":[108],"Specifically,":[111],"introduce":[113],"Modular":[114],"Visual":[115],"Memory":[116],"(MVM),":[117],"a":[118,137],"component":[119],"integrated":[120],"internal":[123],"blocks":[124],"designed":[127],"store":[129],"open-world":[130],"efficiently.":[133],"Additionally,":[134],"present":[136],"soft":[138],"Mixture":[139],"Experts":[142],"(MoMEs)":[143],"architecture":[144],"invoke":[148],"collaboration":[151],"during":[152],"generation.":[154],"Our":[155],"comprehensive":[156],"experiments":[157],"demonstrate":[158],"MKS2":[160],"substantially":[161],"augments":[162],"reasoning":[164],"contexts":[169],"necessitating":[170],"physical":[171],"or":[172],"commonsense":[173],"knowledge.":[174],"It":[175],"also":[176],"delivers":[177],"competitive":[178],"results":[179],"on":[180],"image-text":[181],"benchmarks.":[184],"The":[185],"codes":[186],"will":[187],"available":[189],"at:":[190],"https://github.com/HITsz-TMG/MKS2-Multimodal-Knowledge-Storage-and-Sharing.":[191]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-01-17T00:00:00"}
