{"id":"https://openalex.org/W7130675575","doi":"https://doi.org/10.48550/arxiv.2602.16740","title":"Quantifying LLM Attention-Head Stability: Implications for Circuit Universality","display_name":"Quantifying LLM Attention-Head Stability: Implications for Circuit Universality","publication_year":2026,"publication_date":"2026-02-17","ids":{"openalex":"https://openalex.org/W7130675575","doi":"https://doi.org/10.48550/arxiv.2602.16740"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.16740","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062413515","display_name":"Karan Bali","orcid":"https://orcid.org/0000-0003-0723-4961"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Bali, Karan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126511171","display_name":"Jack Stanley","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stanley, Jack","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054900845","display_name":"Praneet Suresh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Suresh, Praneet","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5051587217","display_name":"Danilo Bzdok","orcid":"https://orcid.org/0000-0003-3466-6620"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bzdok, Danilo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5062413515"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.3682999908924103,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.3682999908924103,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.27250000834465027,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.0430000014603138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/universality","display_name":"Universality (dynamical systems)","score":0.6366999745368958},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.6111000180244446},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5748999714851379},{"id":"https://openalex.org/keywords/electronic-circuit","display_name":"Electronic circuit","score":0.5145999789237976},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5016000270843506},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.44290000200271606}],"concepts":[{"id":"https://openalex.org/C183992945","wikidata":"https://www.wikidata.org/wiki/Q2495574","display_name":"Universality (dynamical systems)","level":2,"score":0.6366999745368958},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.6111000180244446},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5748999714851379},{"id":"https://openalex.org/C134146338","wikidata":"https://www.wikidata.org/wiki/Q1815901","display_name":"Electronic circuit","level":2,"score":0.5145999789237976},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5098000168800354},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5016000270843506},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.44290000200271606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43059998750686646},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.4099000096321106},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4023999869823456},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.36230000853538513},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.323199987411499},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3050000071525574},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C32946077","wikidata":"https://www.wikidata.org/wiki/Q618079","display_name":"Network analysis","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.26100000739097595},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2556999921798706}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.16740","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.16740","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.16740","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.16740","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.7111727595329285,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"mechanistic":[1],"interpretability,":[2],"recent":[3],"work":[4],"scrutinizes":[5],"transformer":[6,79],"\"circuits\"":[7],"-":[8],"sparse,":[9],"mono":[10],"or":[11,53],"multi":[12],"layer":[13,87],"sub":[14],"computations,":[15],"that":[16,105],"may":[17],"reflect":[18],"human":[19],"understandable":[20],"functions.":[21],"Yet,":[22],"these":[23],"network":[24],"circuits":[25,48,170],"are":[26,109],"rarely":[27],"acid-tested":[28],"for":[29,177],"their":[30,136],"stability":[31,74,150],"across":[32,51,96,151],"different":[33],"instances":[34],"of":[35,82,169,186],"the":[36,110,114,139,157,166],"same":[37,140],"deep":[38],"learning":[39],"architecture.":[40],"Without":[41],"this,":[42],"it":[43],"remains":[44],"unclear":[45],"whether":[46],"reported":[47],"emerge":[49],"universally":[50],"labs":[52],"turn":[54],"out":[55],"to":[56,59],"be":[57],"idiosyncratic":[58],"a":[60],"particular":[61],"estimation":[62],"instance,":[63],"potentially":[64],"limiting":[65],"confidence":[66],"in":[67,76,128],"safety-critical":[68],"settings.":[69],"Here,":[70],"we":[71],"systematically":[72],"study":[73],"across-refits":[75],"increasingly":[77],"complex":[78],"language":[80],"models":[81,120],"various":[83],"sizes.":[84],"We":[85],"quantify,":[86],"by":[88],"layer,":[89],"how":[90],"similarly":[91],"attention":[92],"heads":[93,108,127],"learn":[94],"representations":[95],"independently":[97],"initialized":[98],"training":[99],"runs.":[100],"Our":[101,163],"rigorous":[102],"experiments":[103],"show":[104],"(1)":[106],"middle-layer":[107],"least":[111],"stable":[112],"yet":[113,174],"most":[115],"representationally":[116],"distinct;":[117],"(2)":[118],"deeper":[119,129],"exhibit":[121],"stronger":[122],"mid-depth":[123],"divergence;":[124],"(3)":[125],"unstable":[126],"layers":[130],"become":[131],"more":[132],"functionally":[133],"important":[134],"than":[135],"peers":[137],"from":[138],"layer;":[141],"(4)":[142],"applying":[143],"weight":[144],"decay":[145],"optimization":[146],"substantially":[147],"improves":[148],"attention-head":[149],"random":[152],"model":[153],"initializations;":[154],"and":[155],"(5)":[156],"residual":[158],"stream":[159],"is":[160],"comparatively":[161],"stable.":[162],"findings":[164],"establish":[165],"cross-instance":[167],"robustness":[168],"as":[171],"an":[172],"essential":[173],"underappreciated":[175],"prerequisite":[176],"scalable":[178],"oversight,":[179],"drawing":[180],"contours":[181],"around":[182],"possible":[183],"white-box":[184],"monitorability":[185],"AI":[187],"systems.":[188]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-02-21T00:00:00"}
