{"id":"https://openalex.org/W7153250132","doi":"https://doi.org/10.48550/arxiv.2604.08192","title":"Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings","display_name":"Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7153250132","doi":"https://doi.org/10.48550/arxiv.2604.08192"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.08192","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08192","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.08192","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133352614","display_name":"Yunxiang Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Yunxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133343996","display_name":"Mengmeng Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Mengmeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133318455","display_name":"Ziyu Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Ziyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133383482","display_name":"Xi Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Xi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.1656000018119812,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.1656000018119812,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.1096000000834465,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.0778999999165535,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.8070999979972839},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5023999810218811},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4593000113964081},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.4189000129699707},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.33340001106262207},{"id":"https://openalex.org/keywords/proxy","display_name":"Proxy (statistics)","score":0.32499998807907104},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.3188000023365021}],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.8070999979972839},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6845999956130981},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5414999723434448},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5023999810218811},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49059998989105225},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4593000113964081},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.4189000129699707},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.33340001106262207},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33169999718666077},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.32499998807907104},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3188000023365021},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.31040000915527344},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2718000113964081},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.27090001106262207},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.25459998846054077},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.08192","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08192","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.08192","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08192","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reliable":[0],"generalization":[1,25,127,167,184,199],"metrics":[2,148,193],"are":[3,20,87],"fundamental":[4],"to":[5,44,57,150],"the":[6,46,98,114,134,151],"evaluation":[7,22],"of":[8,23,117,126,207],"machine":[9],"learning":[10],"models.":[11],"Especially":[12],"in":[13,67],"high-stakes":[14],"applications":[15],"where":[16],"labeled":[17],"target":[18,51,170],"data":[19],"scarce,":[21],"models'":[24,166],"performance":[26,60],"under":[27,61,185],"distribution":[28,62,187],"shift":[29],"is":[30,70,214],"a":[31,71,110,118,123,141,182],"pressing":[32],"need.":[33],"We":[34,104],"focus":[35],"on":[36,169],"two":[37,147,152],"practical":[38,153],"scenarios:":[39],"(1)":[40,155],"Before":[41,156],"deployment,":[42,55,157,174],"how":[43,56],"select":[45],"best":[47],"model":[48,59,83,94],"for":[49],"unlabeled":[50],"data?":[52],"(2)":[53,172],"After":[54,173],"monitor":[58],"shift?":[63],"The":[64],"central":[65],"need":[66],"both":[68,192],"cases":[69],"reliable":[72],"and":[73,209],"label-free":[74],"proxy":[75,79],"metric.":[76],"Yet":[77],"existing":[78,202],"metrics,":[80],"such":[81],"as":[82,90,122,140],"confidence":[84],"or":[85],"accuracy-on-the-line,":[86],"often":[88],"unreliable":[89],"they":[91],"only":[92],"assess":[93],"output":[95],"while":[96],"ignoring":[97],"internal":[99,138],"mechanisms":[100],"that":[101],"produce":[102],"them.":[103],"address":[105],"this":[106],"limitation":[107],"by":[108,204],"introducing":[109],"new":[111],"perspective:":[112],"using":[113],"inner":[115],"workings":[116],"model,":[119],"i.e.,":[120],"circuits,":[121],"predictive":[124],"metric":[125],"performance.":[128],"Leveraging":[129],"circuit":[130],"discovery,":[131],"we":[132,145,158,175],"extract":[133],"causal":[135],"interactions":[136],"between":[137],"representations":[139],"circuit,":[142],"from":[143],"which":[144,163,180],"derive":[146],"tailored":[149],"scenarios.":[154],"introduce":[159],"Dependency":[160],"Depth":[161],"Bias,":[162],"measures":[164],"different":[165,186],"capability":[168],"data.":[171],"propose":[176],"Circuit":[177],"Shift":[178],"Score,":[179],"predicts":[181],"model's":[183],"shifts.":[188],"Across":[189],"various":[190],"tasks,":[191],"demonstrate":[194],"significantly":[195],"improved":[196],"correlation":[197],"with":[198],"performance,":[200],"outperforming":[201],"proxies":[203],"an":[205],"average":[206],"13.4\\%":[208],"34.1\\%,":[210],"respectively.":[211],"Our":[212],"code":[213],"available":[215],"at":[216],"https://github.com/deep-real/GenCircuit.":[217]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-11T00:00:00"}
