{"id":"https://openalex.org/W7126174289","doi":"https://doi.org/10.48550/arxiv.2601.21621","title":"Similarity of Processing Steps in Vision Model Representations","display_name":"Similarity of Processing Steps in Vision Model Representations","publication_year":2026,"publication_date":"2026-01-29","ids":{"openalex":"https://openalex.org/W7126174289","doi":"https://doi.org/10.48550/arxiv.2601.21621"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2601.21621","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124313016","display_name":"Mat\u00e9o Mahaut","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahaut, Mat\u00e9o","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122418044","display_name":"Marco Baroni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baroni, Marco","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5541999936103821,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5541999936103821,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11094","display_name":"Face Recognition and Perception","score":0.1160999983549118,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.07029999792575836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.42719998955726624},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.42570000886917114},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.41830000281333923},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41600000858306885},{"id":"https://openalex.org/keywords/information-processing","display_name":"Information processing","score":0.3675999939441681},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.33070001006126404}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6284000277519226},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6007999777793884},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.42719998955726624},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.42570000886917114},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.41830000281333923},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41600000858306885},{"id":"https://openalex.org/C87868495","wikidata":"https://www.wikidata.org/wiki/Q750843","display_name":"Information processing","level":2,"score":0.3675999939441681},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3312000036239624},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.33070001006126404},{"id":"https://openalex.org/C143271835","wikidata":"https://www.wikidata.org/wiki/Q254515","display_name":"Similitude","level":2,"score":0.3203999996185303},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.30820000171661377},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2806999981403351},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2709999978542328},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25380000472068787},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.251800000667572},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2601.21621","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2601.21621","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.21621","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2601.21621","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.46178123354911804,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"literature":[1,28],"suggests":[2],"that":[3,30,70,111],"the":[4,6,8,59,68,91,100,121,131,163,168,184],"bigger":[5],"model,":[7],"more":[9,180],"likely":[10],"it":[11],"is":[12,32],"to":[13,15,48,58,72,157,162],"converge":[14,57],"similar,":[16,39],"``universal''":[17],"representations,":[18,124,176],"despite":[19],"different":[20,76,83,87,106,118],"training":[21],"objectives,":[22],"datasets,":[23],"or":[24],"modalities.":[25],"While":[26],"this":[27],"shows":[29],"there":[31],"an":[33],"area":[34],"where":[35],"model":[36,84,175],"representations":[37,50,74,85,158],"are":[38,104],"we":[40,79],"study":[41,67],"here":[42],"how":[43],"vision":[44],"models":[45,96,119,147,153],"might":[46],"get":[47],"those":[49],"--":[51],"in":[52,75,117,140,187],"particular,":[53],"do":[54],"they":[55],"also":[56,148],"same":[60],"intermediate":[61],"steps":[62,102],"and":[63,145,170,177],"operations?":[64],"We":[65,89,109],"therefore":[66],"processes":[69,186],"lead":[71],"convergent":[73],"models.":[77,108,189],"First,":[78],"quantify":[80],"distance":[81],"between":[82,95,107,174],"at":[86,114],"stages.":[88],"follow":[90],"evolution":[92],"of":[93,172,183],"distances":[94],"throughout":[97],"processing,":[98],"identifying":[99],"processing":[101],"which":[103],"most":[105,122],"find":[110],"while":[112],"layers":[113],"similar":[115,123],"positions":[116],"have":[120],"strong":[125],"differences":[126],"remain.":[127],"Classifier":[128],"models,":[129],"unlike":[130],"others,":[132],"will":[133],"discard":[134],"information":[135],"about":[136],"low-level":[137],"image":[138,188],"statistics":[139],"their":[141],"final":[142],"layers.":[143],"CNN-":[144],"transformer-based":[146],"behave":[149],"differently,":[150],"with":[151],"transformer":[152],"applying":[154],"smoother":[155],"changes":[156],"from":[159],"one":[160],"layer":[161],"next.":[164],"These":[165],"distinctions":[166],"clarify":[167],"level":[169],"nature":[171],"convergence":[173],"enables":[178],"a":[179],"qualitative":[181],"account":[182],"underlying":[185]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-01T00:00:00"}
