{"id":"https://openalex.org/W4405254344","doi":"https://doi.org/10.48550/arxiv.2412.06639","title":"Beyond Scalars: Concept-Based Alignment Analysis in Vision Transformers","display_name":"Beyond Scalars: Concept-Based Alignment Analysis in Vision Transformers","publication_year":2024,"publication_date":"2024-12-09","ids":{"openalex":"https://openalex.org/W4405254344","doi":"https://doi.org/10.48550/arxiv.2412.06639"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2412.06639","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.06639","pdf_url":"https://arxiv.org/pdf/2412.06639","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.06639","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082427123","display_name":"Johanna Vielhaben","orcid":"https://orcid.org/0000-0001-9399-5710"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Vielhaben, Johanna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082253730","display_name":"Dilyara Bareeva","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bareeva, Dilyara","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092691971","display_name":"Jim Berend","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Berend, Jim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026451495","display_name":"Wojciech Samek","orcid":"https://orcid.org/0000-0002-6283-3265"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Samek, Wojciech","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5034797797","display_name":"Nils Strodthoff","orcid":"https://orcid.org/0000-0003-4447-0162"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Strodthoff, Nils","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5082427123"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.4864000082015991,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.4864000082015991,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5249859094619751},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4906269609928131},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43045127391815186},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.38571876287460327},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1971392035484314},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.09623917937278748},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.05123397707939148}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5249859094619751},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4906269609928131},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43045127391815186},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38571876287460327},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1971392035484314},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.09623917937278748},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.05123397707939148}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2412.06639","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.06639","pdf_url":"https://arxiv.org/pdf/2412.06639","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:zenodo.org:18199749","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arXiv.2412.06639","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"The Thirty-ninth Annual Conference on Neural Information Processing Systems NeurIPS 2025, San Diego","raw_type":"info:eu-repo/semantics/preprint"},{"id":"doi:10.48550/arxiv.2412.06639","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.06639","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.06639","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.06639","pdf_url":"https://arxiv.org/pdf/2412.06639","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Vision":[0],"transformers":[1],"(ViTs)":[2],"can":[3,151],"be":[4,161],"trained":[5],"using":[6],"various":[7],"learning":[8],"paradigms,":[9],"from":[10,221],"fully":[11],"supervised":[12],"to":[13,160,166],"self-supervised.":[14],"Diverse":[15],"training":[16],"protocols":[17],"often":[18],"result":[19],"in":[20,39,54,85,129,211,233],"significantly":[21],"different":[22,98,223],"feature":[23,86,140],"spaces,":[24],"which":[25,75],"are":[26],"usually":[27],"compared":[28],"through":[29],"alignment":[30,34,70,80,190,205,217],"analysis.":[31],"However,":[32],"current":[33],"measures":[35],"quantify":[36],"this":[37,66],"relationship":[38],"terms":[40,130],"of":[41,56,79,106,110,126,131,138,177,193,199,219,237],"a":[42,77,124,182,212,231],"single":[43,82],"scalar":[44,62],"value,":[45],"obscuring":[46],"the":[47,60,103,127,136,139,146,197,234],"distinctions":[48],"between":[49,173,191],"common":[50],"and":[51,94,186],"unique":[52,95],"features":[53,159],"pairs":[55,192],"representations":[57,220],"that":[58,133,226],"share":[59],"same":[61],"alignment.":[63],"We":[64,195],"address":[65,115],"limitation":[67],"by":[68,163],"combining":[69],"analysis":[71,206,218],"with":[72,230],"concept":[73,174,202],"discovery,":[74],"enables":[76],"breakdown":[78],"into":[81],"concepts":[83,96,107,132,144],"encoded":[84],"space.":[87],"This":[88],"fine-grained":[89],"comparison":[90],"reveals":[91,225],"both":[92],"universal":[93],"across":[97],"representations,":[99,179],"as":[100,102,145],"well":[101],"internal":[104],"structure":[105,149,236],"within":[108],"each":[109],"them.":[111],"Our":[112],"methodological":[113],"contributions":[114],"two":[116,178],"key":[117],"prerequisites":[118],"for":[119,189,204],"concept-based":[120,216],"alignment:":[121],"1)":[122],"For":[123],"description":[125],"representation":[128],"faithfully":[134],"capture":[135],"geometry":[137],"space,":[141],"we":[142,180],"define":[143],"most":[147],"general":[148],"they":[150],"possibly":[152],"form":[153],"-":[154],"arbitrary":[155],"manifolds,":[156],"allowing":[157],"hidden":[158],"described":[162],"their":[164],"proximity":[165,175],"these":[167],"manifolds.":[168],"2)":[169],"To":[170],"measure":[171],"distances":[172],"scores":[176],"use":[181],"generalized":[183],"Rand":[184],"index":[185],"partition":[187],"it":[188],"concepts.":[194],"confirm":[196],"superiority":[198],"our":[200],"novel":[201],"definition":[203],"over":[207],"existing":[208],"linear":[209],"baselines":[210],"sanity":[213],"check.":[214],"The":[215],"four":[222],"ViTs":[224],"increased":[227],"supervision":[228],"correlates":[229],"reduction":[232],"semantic":[235],"learned":[238],"representations.":[239]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
