{"id":"https://openalex.org/W7160435958","doi":"https://doi.org/10.48550/arxiv.2605.03456","title":"VL-SAM-v3: Memory-Guided Visual Priors for Open-World Object Detection","display_name":"VL-SAM-v3: Memory-Guided Visual Priors for Open-World Object Detection","publication_year":2026,"publication_date":"2026-05-05","ids":{"openalex":"https://openalex.org/W7160435958","doi":"https://doi.org/10.48550/arxiv.2605.03456"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.03456","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03456","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.03456","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135482693","display_name":"Chih-Chung Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Chih-Chung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135515630","display_name":"Zhiwei Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Zhiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135472250","display_name":"Yongtao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yongtao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.7032999992370605,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.7032999992370605,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.09929999709129333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.07479999959468842,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.8611000180244446},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.652999997138977},{"id":"https://openalex.org/keywords/generality","display_name":"Generality","score":0.5796999931335449},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5511999726295471},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5041000247001648},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.490200012922287},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.45809999108314514},{"id":"https://openalex.org/keywords/saliency-map","display_name":"Saliency map","score":0.3659000098705292}],"concepts":[{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.8611000180244446},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7409999966621399},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6948000192642212},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.652999997138977},{"id":"https://openalex.org/C2780767217","wikidata":"https://www.wikidata.org/wiki/Q5532421","display_name":"Generality","level":2,"score":0.5796999931335449},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5511999726295471},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5041000247001648},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4975000023841858},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.490200012922287},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.45809999108314514},{"id":"https://openalex.org/C2779679900","wikidata":"https://www.wikidata.org/wiki/Q25304431","display_name":"Saliency map","level":3,"score":0.3659000098705292},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.319599986076355},{"id":"https://openalex.org/C2986089797","wikidata":"https://www.wikidata.org/wiki/Q6501338","display_name":"Visual attention","level":3,"score":0.3124000132083893},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C137270730","wikidata":"https://www.wikidata.org/wiki/Q120811","display_name":"Detection theory","level":3,"score":0.2815999984741211},{"id":"https://openalex.org/C203595873","wikidata":"https://www.wikidata.org/wiki/Q25389927","display_name":"Change detection","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2615000009536743}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.03456","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03456","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.03456","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03456","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4705997407436371,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Open-world":[0],"object":[1],"detection":[2,83,134,161],"aims":[3],"to":[4],"localize":[5],"and":[6,34,53,68,105,120,148,166],"recognize":[7],"objects":[8],"beyond":[9],"a":[10,27,77,101,141,179],"fixed":[11],"closed-set":[12],"label":[13],"space.":[14],"It":[15],"is":[16],"commonly":[17],"divided":[18],"into":[19,108],"two":[20,109],"categories,":[21,67],"i.e.,":[22,113],"open-vocabulary":[23,147,165,181],"detection,":[24,36],"which":[25,37,56],"assumes":[26],"predefined":[28],"category":[29],"list":[30],"at":[31],"test":[32],"time,":[33],"open-ended":[35,149,167],"requires":[38],"generating":[39],"candidate":[40,91],"categories":[41,92],"during":[42],"the":[43,132,186,189],"inference.":[44,150],"Existing":[45],"methods":[46],"rely":[47],"primarily":[48],"on":[49,154,173],"coarse":[50],"textual":[51],"semantics":[52],"parametric":[54],"knowledge,":[55],"often":[57],"provide":[58],"insufficient":[59],"visual":[60,87,98,111],"evidence":[61],"for":[62,116,123],"fine-grained":[63],"appearance":[64],"variation,":[65],"rare":[66,174],"cluttered":[69],"scenes.":[70],"In":[71],"this":[72],"paper,":[73],"we":[74],"propose":[75],"VL-SAM-v3,":[76],"unified":[78],"framework":[79],"that":[80,145,157],"augments":[81],"open-world":[82],"with":[84,131,169,178],"retrieval-grounded":[85],"external":[86],"memory.":[88],"Specifically,":[89],"once":[90],"are":[93,129],"available,":[94],"VL-SAM-v3":[95,158],"retrieves":[96],"relevant":[97],"prototypes":[99],"from":[100],"non-parametric":[102],"memory":[103],"bank":[104],"transforms":[106],"them":[107],"complementary":[110],"priors,":[112],"sparse":[114],"priors":[115,122,128],"instance-level":[117],"spatial":[118],"anchoring":[119],"dense":[121],"class-aware":[124],"local":[125],"context.":[126],"These":[127],"integrated":[130],"original":[133],"prompts":[135],"via":[136],"Memory-Guided":[137],"Prompt":[138],"Refinement,":[139],"enabling":[140],"shared":[142],"retrieval-and-refinement":[143,191],"mechanism":[144],"supports":[146],"Extensive":[151],"zero-shot":[152],"experiments":[153,177],"LVIS":[155],"show":[156],"consistently":[159],"improves":[160],"performance":[162],"under":[163],"both":[164],"inference,":[168],"particularly":[170],"strong":[171],"gains":[172],"categories.":[175],"Moreover,":[176],"stronger":[180],"detector":[182],"(i.e.,":[183],"SAM3)":[184],"validate":[185],"generality":[187],"of":[188],"proposed":[190],"mechanism.":[192]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-07T00:00:00"}
