{"id":"https://openalex.org/W7158231876","doi":"https://doi.org/10.48550/arxiv.2604.25855","title":"SIEVES: Selective Prediction Generalizes through Visual Evidence Scoring","display_name":"SIEVES: Selective Prediction Generalizes through Visual Evidence Scoring","publication_year":2026,"publication_date":"2026-04-28","ids":{"openalex":"https://openalex.org/W7158231876","doi":"https://doi.org/10.48550/arxiv.2604.25855"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.25855","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25855","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.25855","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126637668","display_name":"Hector G. Rodriguez","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rodriguez, Hector G.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134838456","display_name":"Marcus Rohrbach","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rohrbach, Marcus","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5126637668"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9754999876022339,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9754999876022339,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.007799999788403511,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.004800000227987766,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semantic-reasoner","display_name":"Semantic reasoner","score":0.8284000158309937},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7006000280380249},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.430400013923645},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.38659998774528503},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.32690000534057617},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.3046000003814697}],"concepts":[{"id":"https://openalex.org/C9616225","wikidata":"https://www.wikidata.org/wiki/Q3929429","display_name":"Semantic reasoner","level":2,"score":0.8284000158309937},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7452999949455261},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7006000280380249},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5738999843597412},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5386999845504761},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.430400013923645},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.38659998774528503},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C87833898","wikidata":"https://www.wikidata.org/wiki/Q1060280","display_name":"Advanced driver assistance systems","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.25699999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.25855","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25855","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.25855","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25855","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"large":[1],"language":[2],"models":[3,112,226],"(MLLMs)":[4],"achieve":[5],"ever-stronger":[6],"performance":[7],"on":[8,69,86,161],"visual-language":[9],"tasks.":[10],"Even":[11],"as":[12,201],"traditional":[13],"visual":[14,116],"question":[15],"answering":[16],"(VQA)":[17],"benchmarks":[18,164,223],"approach":[19],"saturation,":[20],"reliable":[21,105],"deployment":[22],"requires":[23],"satisfying":[24],"low":[25],"error":[26],"tolerances":[27],"in":[28,107],"real-world,":[29],"out-of-distribution":[30],"(OOD)":[31],"scenarios.":[32],"Precisely,":[33],"selective":[34,78],"prediction":[35,79],"aims":[36],"to":[37,50,64,113,127,158,173,179,190,195,211],"improve":[38],"coverage,":[39],"i.e.":[40],"the":[41,45,129,132,136,182,185],"share":[42],"of":[43,131,184],"inputs":[44,141],"system":[46],"answers,":[47],"while":[48,118],"adhering":[49],"a":[51,61,74,122],"user-defined":[52],"risk":[53],"level.":[54],"This":[55],"is":[56,239],"typically":[57],"achieved":[58],"by":[59,135,156],"assigning":[60],"confidence":[62,83],"score":[63],"each":[65],"answer":[66],"and":[67,120,142,170,203,224,229],"abstaining":[68],"those":[70,209],"that":[71,124,146,216],"fall":[72],"below":[73],"certain":[75],"threshold.":[76],"Existing":[77],"methods":[80],"estimate":[81,128],"implicit":[82],"scores,":[84],"relying":[85],"model":[87,140],"internal":[88],"signals":[89],"like":[90],"logits":[91],"or":[92,198,233,236],"hidden":[93],"representations,":[94],"which":[95],"are":[96],"not":[97],"available":[98,241],"for":[99],"frontier":[100],"closed-sourced":[101],"models.":[102],"To":[103],"enable":[104],"generalization":[106,178],"VQA,":[108],"we":[109],"require":[110],"reasoner":[111,137,225],"produce":[114],"localized":[115],"evidence":[117],"answering,":[119],"design":[121,183],"selector":[123,187],"explicitly":[125],"learns":[126],"quality":[130],"localization":[133],"provided":[134],"using":[138],"only":[139],"outputs.":[143],"We":[144,214],"show":[145],"SIEVES":[147,186,217],"(Selective":[148],"Prediction":[149],"through":[150],"Visual":[151],"Evidence":[152],"Scoring)":[153],"improves":[154],"coverage":[155,206],"up":[157],"three":[159],"times":[160],"challenging":[162],"OOD":[163,180,222],"(V*":[165],"Bench,":[166],"HR-Bench-8k,":[167],"MME-RealWorld-Lite,":[168],"VizWiz,":[169],"AdVQA),":[171],"compared":[172],"non-grounding":[174],"baselines.":[175],"Beyond":[176],"better":[177],"tasks,":[181],"enables":[188],"transfer":[189],"proprietary":[191],"reasoners":[192],"without":[193,231],"access":[194],"their":[196],"weights":[197],"logits,":[199],"such":[200],"o3":[202],"Gemini-3-Pro,":[204],"providing":[205],"boosts":[207],"beyond":[208],"attributable":[210],"accuracy":[212],"alone.":[213],"highlight":[215],"generalizes":[218],"across":[219],"all":[220],"tested":[221],"(Pixel-Reasoner,":[227],"o3,":[228],"Gemini-3-Pro),":[230],"benchmark-":[232],"reasoner-specific":[234],"training":[235],"adaptation.":[237],"Code":[238],"publicly":[240],"at":[242],"https://github.com/hector-gr/SIEVES":[243],".":[244]},"counts_by_year":[],"updated_date":"2026-05-16T06:04:12.930555","created_date":"2026-04-30T00:00:00"}
