{"id":"https://openalex.org/W7143450904","doi":"https://doi.org/10.48550/arxiv.2603.26127","title":"Finding Distributed Object-Centric Properties in Self-Supervised Transformers","display_name":"Finding Distributed Object-Centric Properties in Self-Supervised Transformers","publication_year":2026,"publication_date":"2026-03-27","ids":{"openalex":"https://openalex.org/W7143450904","doi":"https://doi.org/10.48550/arxiv.2603.26127"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.26127","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26127","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.26127","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020509971","display_name":"Samyak Rawlekar","orcid":"https://orcid.org/0009-0000-7513-3797"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rawlekar, Samyak","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051277322","display_name":"Amitabh Swain","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Swain, Amitabh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130916718","display_name":"Yujun Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Yujun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130963412","display_name":"Yiwei Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yiwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130965489","display_name":"Ming-Hsuan Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Ming-Hsuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5108521995","display_name":"Narendra Ahuja","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahuja, Narendra","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5020509971"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.4242999851703644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.4242999851703644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.22709999978542328,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.0868000015616417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.6969000101089478},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5020999908447266},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.48080000281333923},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.42590001225471497},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4129999876022339},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3898000121116638}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7523000240325928},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.6969000101089478},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5020999908447266},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4975999891757965},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.48080000281333923},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.42590001225471497},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4129999876022339},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3898000121116638},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3312000036239624},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.29429998993873596},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.29339998960494995},{"id":"https://openalex.org/C81192388","wikidata":"https://www.wikidata.org/wiki/Q5283169","display_name":"Distributed object","level":3,"score":0.2874000072479248},{"id":"https://openalex.org/C4292930","wikidata":"https://www.wikidata.org/wiki/Q17009341","display_name":"Distributed element model","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C48105269","wikidata":"https://www.wikidata.org/wiki/Q1141160","display_name":"Header","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C143271835","wikidata":"https://www.wikidata.org/wiki/Q254515","display_name":"Similitude","level":2,"score":0.2612000107765198}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.26127","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26127","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.26127","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26127","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","display_name":"No poverty","score":0.5689187049865723}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Self-supervised":[0],"Vision":[1],"Transformers":[2],"(ViTs)":[3],"like":[4],"DINO":[5],"show":[6],"an":[7,45],"emergent":[8],"ability":[9],"to":[10,131,172,187],"discover":[11],"objects,":[12],"typically":[13],"observed":[14],"in":[15,32,64,95,195],"[CLS]":[16,41,117],"token":[17],"attention":[18,78,153],"maps":[19,26,98],"of":[20,35,53,162],"the":[21,40,49,60,65,96,116,126,132,160,168],"final":[22,133],"layer.":[23,134],"However,":[24],"these":[25,137],"often":[27],"contain":[28],"spurious":[29],"activations":[30],"resulting":[31],"poor":[33],"localization":[34],"objects.":[36,56,174],"This":[37,57,120],"is":[38,123],"because":[39],"token,":[42],"trained":[43],"on":[44,55,136,159,179],"image-level":[46],"objective,":[47],"summarizes":[48],"entire":[50],"image":[51],"instead":[52],"focusing":[54],"aggregation":[58],"dilutes":[59],"object-centric":[61,121,149,169,211],"information":[62,122,212],"existing":[63],"local,":[66],"patch-level":[67,77],"interactions.":[68],"We":[69,87,175],"analyze":[70],"this":[71,147,209],"by":[72,200],"computing":[73],"inter-patch":[74],"similarity":[75,97],"using":[76,208],"components":[79,103],"(query,":[80],"key,":[81],"and":[82,165,191],"value)":[83],"across":[84,125,155],"all":[85,101,156,173],"layers.":[86],"find":[88],"that:":[89],"(1)":[90],"Object-centric":[91],"properties":[92],"are":[93],"encoded":[94],"derived":[99],"from":[100],"three":[102],"($q,":[104],"k,":[105],"v$),":[106],"unlike":[107],"prior":[108],"work":[109],"that":[110,145,207],"uses":[111],"only":[112],"key":[113],"features":[114],"or":[115],"token.":[118],"(2)":[119],"distributed":[124,148,210],"network,":[127],"not":[128],"just":[129],"confined":[130],"Based":[135],"insights,":[138],"we":[139],"introduce":[140],"Object-DINO,":[141],"a":[142],"training-free":[143],"method":[144],"extracts":[146],"information.":[150],"Object-DINO":[151],"clusters":[152],"heads":[154],"layers":[157],"based":[158],"similarities":[161],"their":[163],"patches":[164],"automatically":[166],"identifies":[167],"cluster":[170],"corresponding":[171],"demonstrate":[176,206],"Object-DINO's":[177],"effectiveness":[178],"two":[180],"applications:":[181],"enhancing":[182],"unsupervised":[183],"object":[184,193],"discovery":[185],"(+3.6":[186],"+12.4":[188],"CorLoc":[189],"gains)":[190],"mitigating":[192],"hallucination":[194],"Multimodal":[196],"Large":[197],"Language":[198],"Models":[199],"providing":[201],"visual":[202],"grounding.":[203],"Our":[204],"results":[205],"improves":[213],"downstream":[214],"tasks":[215],"without":[216],"additional":[217],"training.":[218]},"counts_by_year":[],"updated_date":"2026-03-31T06:07:48.031334","created_date":"2026-03-31T00:00:00"}
