{"id":"https://openalex.org/W7153009701","doi":"https://doi.org/10.48550/arxiv.2604.08456","title":"Entropy-Gradient Grounding: Training-Free Evidence Retrieval in Vision-Language Models","display_name":"Entropy-Gradient Grounding: Training-Free Evidence Retrieval in Vision-Language Models","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7153009701","doi":"https://doi.org/10.48550/arxiv.2604.08456"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.08456","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08456","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.08456","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133347252","display_name":"Marcel Gr\u00f6pl","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gr\u00f6pl, Marcel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133388705","display_name":"Jaewoo Jung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jung, Jaewoo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133319474","display_name":"Seungryong Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Seungryong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133320497","display_name":"Marc Pollefeys","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pollefeys, Marc","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133349246","display_name":"Sunghwan Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Sunghwan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5133347252"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.0024999999441206455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.001500000013038516,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.447299987077713},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.43140000104904175},{"id":"https://openalex.org/keywords/framing","display_name":"Framing (construction)","score":0.3433000147342682},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.3375000059604645},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.3353999853134155},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.33230000734329224},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.32690000534057617}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6890000104904175},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49950000643730164},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.447299987077713},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.43140000104904175},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3499999940395355},{"id":"https://openalex.org/C169087156","wikidata":"https://www.wikidata.org/wiki/Q2131593","display_name":"Framing (construction)","level":2,"score":0.3433000147342682},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.3375000059604645},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.3353999853134155},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33500000834465027},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.303600013256073},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.302700012922287},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.30149999260902405},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27059999108314514},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.26899999380111694},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2612999975681305},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.25049999356269836},{"id":"https://openalex.org/C171752962","wikidata":"https://www.wikidata.org/wiki/Q255166","display_name":"Kullback\u2013Leibler divergence","level":2,"score":0.25}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.08456","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08456","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.08456","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08456","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5831299424171448,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"rapid":[1],"progress,":[2],"pretrained":[3],"vision-language":[4],"models":[5],"still":[6],"struggle":[7],"when":[8],"answers":[9],"depend":[10],"on":[11,16,126,143],"tiny":[12],"visual":[13,84],"details":[14],"or":[15,96],"combining":[17],"clues":[18],"spread":[19],"across":[20,129],"multiple":[21,104],"regions,":[22],"as":[23,35,67],"in":[24],"documents":[25],"and":[26,79,102,111,145],"compositional":[27],"queries.":[28],"We":[29,99],"address":[30],"this":[31,55],"by":[32],"framing":[33],"grounding":[34,62],"test-time":[36],"evidence":[37,153],"retrieval:":[38],"given":[39],"a":[40,59,118],"query,":[41],"the":[42,72,75,83,140],"model":[43],"should":[44],"actively":[45],"identify":[46],"where":[47],"to":[48,51,82,87,107,122],"look":[49],"next":[50],"resolve":[52],"ambiguity.":[53],"To":[54],"end,":[56],"we":[57,70],"propose":[58],"training-free,":[60],"model-intrinsic":[61],"method":[63],"that":[64],"uses":[65],"uncertainty":[66],"supervision.":[68],"Specifically,":[69],"compute":[71],"entropy":[73],"of":[74],"model's":[76],"next-token":[77],"distribution":[78],"backpropagate":[80],"it":[81],"token":[85],"embeddings":[86],"obtain":[88],"an":[89,113],"entropy-gradient":[90],"relevance":[91],"map,":[92],"without":[93],"auxiliary":[94],"detectors":[95],"attention-map":[97],"heuristics.":[98],"then":[100],"extract":[101],"rank":[103],"coherent":[105],"regions":[106],"support":[108],"multi-evidence":[109],"queries,":[110],"introduce":[112],"iterative":[114],"zoom-and-reground":[115],"procedure":[116],"with":[117,139],"spatial-entropy":[119],"stopping":[120],"rule":[121],"avoid":[123],"over-refinement.":[124],"Experiments":[125],"seven":[127],"benchmarks":[128],"four":[130],"VLM":[131],"architectures":[132],"demonstrate":[133],"consistent":[134],"improvements":[135],"over":[136],"existing":[137],"methods,":[138],"largest":[141],"gains":[142],"detail-critical":[144],"high-resolution":[146],"settings,":[147],"while":[148],"also":[149],"producing":[150],"more":[151],"interpretable":[152],"localizations.":[154]},"counts_by_year":[],"updated_date":"2026-04-11T06:19:08.300824","created_date":"2026-04-11T00:00:00"}
