{"id":"https://openalex.org/W7158777241","doi":"https://doi.org/10.48550/arxiv.2604.26404","title":"Decoupled Prototype Matching with Vision Foundation Models for Few-Shot Industrial Object Detection","display_name":"Decoupled Prototype Matching with Vision Foundation Models for Few-Shot Industrial Object Detection","publication_year":2026,"publication_date":"2026-04-29","ids":{"openalex":"https://openalex.org/W7158777241","doi":"https://doi.org/10.48550/arxiv.2604.26404"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.26404","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26404","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.26404","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128017390","display_name":"Hari Prasanth S. M.","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M., Hari Prasanth S.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066749549","display_name":"Nilusha Jayawickrama","orcid":"https://orcid.org/0000-0003-1188-5521"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jayawickrama, Nilusha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5061093557","display_name":"Risto Ojala","orcid":"https://orcid.org/0000-0003-0865-1775"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ojala, Risto","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5411999821662903,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5411999821662903,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.16590000689029694,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.05609999969601631,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.7864000201225281},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7605999708175659},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5787000060081482},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5719000101089478},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5641000270843506},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5570999979972839},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.546999990940094},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5324000120162964},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5074999928474426}],"concepts":[{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.7864000201225281},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7605999708175659},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7470999956130981},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7269999980926514},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.583899974822998},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5787000060081482},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5719000101089478},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5641000270843506},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5570999979972839},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.546999990940094},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5324000120162964},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5074999928474426},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46779999136924744},{"id":"https://openalex.org/C182521987","wikidata":"https://www.wikidata.org/wiki/Q2493877","display_name":"Viola\u2013Jones object detection framework","level":5,"score":0.4456999897956848},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.41600000858306885},{"id":"https://openalex.org/C71681937","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object-class detection","level":5,"score":0.40939998626708984},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.40700000524520874},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.4047999978065491},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.34389999508857727},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33799999952316284},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.31949999928474426},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C20894473","wikidata":"https://www.wikidata.org/wiki/Q1116105","display_name":"Object model","level":3,"score":0.2732999920845032},{"id":"https://openalex.org/C203595873","wikidata":"https://www.wikidata.org/wiki/Q25389927","display_name":"Change detection","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C194789388","wikidata":"https://www.wikidata.org/wiki/Q17855283","display_name":"CAD","level":2,"score":0.2522999942302704},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.26404","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26404","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.26404","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26404","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.6802486777305603,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Industrial":[0],"object":[1,36,95,139],"detection":[2,37,59,119,140,146,157],"systems":[3],"typically":[4],"rely":[5],"on":[6,121],"large":[7,181],"annotated":[8,182],"datasets,":[9],"which":[10],"are":[11,50,97,106],"expensive":[12],"to":[13,17,66,153,165],"collect":[14],"and":[15,103,108],"challenging":[16],"maintain":[18],"in":[19,38],"industrial":[20,40,124,192],"scenarios":[21],"where":[22,42],"the":[23,32,118,127,136,154,160,187],"inventory":[24],"of":[25,34,47,81],"objects":[26,68,168],"changes":[27],"frequently.":[28],"This":[29],"work":[30],"addresses":[31],"challenge":[33],"few-shot":[35],"such":[39],"scenarios,":[41],"only":[43,170],"a":[44,58,78,89,100,171],"limited":[45],"number":[46],"labeled":[48],"samples":[49,83],"available":[51],"for":[52,129,190],"newly":[53],"introduced":[54],"objects.":[55],"We":[56,116,143],"present":[57],"framework":[60],"that":[61],"leverages":[62],"vision":[63],"foundation":[64],"models":[65,179],"recognize":[67],"with":[69,110],"minimal":[70],"supervision.":[71],"The":[72],"method":[73,120,162],"constructs":[74],"class":[75,111],"prototypes":[76,112],"from":[77,126],"small":[79],"set":[80],"reference":[82,173],"by":[84,150],"extracting":[85],"feature":[86,104],"representations.":[87],"For":[88],"given":[90],"query":[91],"scene":[92],"during":[93],"inference,":[94],"regions":[96],"generated":[98],"using":[99,113,169],"segmentation":[101],"model,":[102],"embeddings":[105],"extracted":[107],"matched":[109],"similarity":[114],"matching.":[115],"evaluate":[117],"three":[122],"established":[123],"datasets":[125],"Benchmark":[128],"6D":[130],"Object":[131],"Pose":[132],"Estimation":[133],"benchmark":[134],"following":[135],"official":[137],"2D":[138],"evaluation":[141],"protocol.":[142],"demonstrate":[144],"competitive":[145],"performance,":[147],"improving":[148],"AP":[149],"6.9%":[151],"compared":[152],"state-of-the-art":[155],"training-free":[156],"methods.":[158],"Furthermore,":[159],"presented":[161],"is":[163],"able":[164],"onboard":[166],"new":[167],"few":[172],"images,":[174],"without":[175],"requiring":[176],"any":[177],"CAD":[178],"or":[180],"datasets.":[183],"These":[184],"properties":[185],"make":[186],"approach":[188],"well-suited":[189],"real-world":[191],"applications.":[193]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-01T00:00:00"}
