{"id":"https://openalex.org/W7160286336","doi":"https://doi.org/10.1109/wacv61042.2026.00585","title":"Understanding the Visual Projection Space of Multimodal LLMs","display_name":"Understanding the Visual Projection Space of Multimodal LLMs","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7160286336","doi":"https://doi.org/10.1109/wacv61042.2026.00585"},"language":null,"primary_location":{"id":"doi:10.1109/wacv61042.2026.00585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61042.2026.00585","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135309544","display_name":"Sungheon Jeong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210137750","display_name":"UC Irvine Health","ror":"https://ror.org/03fgher32","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210137750"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sungheon Jeong","raw_affiliation_strings":["UC Irvine"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UC Irvine","institution_ids":["https://openalex.org/I4210137750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135300694","display_name":"Yoojeong Song","orcid":null},"institutions":[{"id":"https://openalex.org/I24541011","display_name":"Soonchunhyang University","ror":"https://ror.org/03qjsrb10","country_code":"KR","type":"education","lineage":["https://openalex.org/I24541011"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yoojeong Song","raw_affiliation_strings":["Soonchunhyang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Soonchunhyang University","institution_ids":["https://openalex.org/I24541011"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123459684","display_name":"H J Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I174101054","display_name":"Changwon National University","ror":"https://ror.org/04ts4qa58","country_code":"KR","type":"education","lineage":["https://openalex.org/I174101054"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyungjoon Kim","raw_affiliation_strings":["Changwon National University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Changwon National University","institution_ids":["https://openalex.org/I174101054"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.68978887,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6049","last_page":"6058"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.31029999256134033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.31029999256134033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11904","display_name":"Spatial Cognition and Navigation","score":0.0771000012755394,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.05389999970793724,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.5582000017166138},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.44690001010894775},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.36399999260902405},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.2948000133037567},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.2703999876976013}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5911999940872192},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5873000025749207},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.5582000017166138},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.553600013256073},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.44690001010894775},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.36399999260902405},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C123403432","wikidata":"https://www.wikidata.org/wiki/Q654068","display_name":"Visibility","level":2,"score":0.24609999358654022},{"id":"https://openalex.org/C2779332521","wikidata":"https://www.wikidata.org/wiki/Q1820694","display_name":"Legibility","level":2,"score":0.2451999932527542}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wacv61042.2026.00585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61042.2026.00585","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2045512849","https://openalex.org/W2126017757","https://openalex.org/W2185175083","https://openalex.org/W2560730294","https://openalex.org/W2890399523","https://openalex.org/W2952122856","https://openalex.org/W2988217457","https://openalex.org/W3105816068","https://openalex.org/W3138516171","https://openalex.org/W7133193597","https://openalex.org/W7133196460","https://openalex.org/W7133220561"],"related_works":[],"abstract_inverted_index":{"What":[0],"role":[1],"does":[2],"a":[3,9,21,24,54,130],"single":[4],"vision":[5],"token":[6],"play":[7],"inside":[8],"multimodal":[10,147],"large":[11],"language":[12],"model":[13],"(MLLM)?":[14],"Despite":[15],"recent":[16],"successes,":[17],"most":[18],"MLLMs":[19,73],"adopt":[20],"simple":[22],"design:":[23],"projected":[25],"visual":[26],"feature":[27],"z":[28],"=":[29],"P(f<inf":[30],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[31],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">x</inf>)":[32],"prepended":[33],"to":[34],"the":[35],"text":[36],"sequence.":[37],"Yet":[38],"it":[39],"remains":[40],"unclear":[41],"whether":[42],"this":[43],"vector":[44],"merely":[45],"provides":[46],"context":[47],"or":[48,119],"actively":[49],"steers":[50],"generation.":[51,148],"We":[52],"propose":[53],"geometric":[55],"probing":[56],"framework":[57],"that":[58,117],"analyzes":[59],"latent\u2013token":[60],"alignment,":[61],"intrinsic":[62],"dimensionality,":[63],"and":[64,70,101,113,136,145],"perturbation":[65],"sensitivity.":[66],"Across":[67],"four":[68],"datasets":[69],"three":[71],"representative":[72],"(LLaVA,":[74],"BLIP-2,":[75],"Kosmos-2),":[76],"we":[77],"find":[78],"clear":[79],"operating":[80],"regimes:":[81],"BLIP-2":[82],"enforces":[83],"rigid":[84],"low-rank":[85],"compression":[86],"with":[87,98,109],"strong":[88],"alignment":[89,143],"but":[90],"near-zero":[91],"sensitivity,":[92],"LLaVA":[93],"exhibits":[94],"flexible":[95],"high-dimensional":[96],"mappings":[97],"high":[99],"responsiveness,":[100],"Kosmos-2":[102],"balances":[103],"between":[104],"them.":[105],"These":[106],"signatures":[107],"correlate":[108],"downstream":[110],"behavior\u2014SQA":[111],"correctness":[112],"VQAv2":[114],"hallucination":[115],"severity\u2014showing":[116],"reduced":[118],"excessive":[120],"sensitivity":[121],"predicts":[122],"unreliable":[123],"grounding.":[124],"Our":[125],"results":[126],"highlight":[127],"geometry":[128],"as":[129],"diagnostic":[131],"lens":[132],"for":[133,140],"vision\u2013language":[134],"coupling":[135],"offer":[137],"actionable":[138],"guidance":[139],"projection":[141],"design,":[142],"objectives,":[144],"user-steerable":[146]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-06T00:00:00"}
