{"id":"https://openalex.org/W7138429609","doi":"https://doi.org/10.48550/arxiv.2603.13652","title":"Causal Attribution via Activation Patching","display_name":"Causal Attribution via Activation Patching","publication_year":2026,"publication_date":"2026-03-13","ids":{"openalex":"https://openalex.org/W7138429609","doi":"https://doi.org/10.48550/arxiv.2603.13652"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13652","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13652","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13652","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041305530","display_name":"Ahmad Izadi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Izadi, Amirmohammad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114620189","display_name":"Mohammadali Banayeeanzade","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Banayeeanzade, Mohammadali","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129748851","display_name":"Alireza Mirrokni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mirrokni, Alireza","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102776829","display_name":"Hosein Hasani","orcid":"https://orcid.org/0009-0002-6234-897X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hasani, Hosein","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129734240","display_name":"Mobin Bagherian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bagherian, Mobin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107516044","display_name":"Faridoun Mehri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mehri, Faridoun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5069082023","display_name":"Mahdieh Soleymani Baghshah","orcid":"https://orcid.org/0000-0002-1971-6231"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baghshah, Mahdieh Soleymani","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5041305530"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6251999735832214,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6251999735832214,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11094","display_name":"Face Recognition and Perception","score":0.10029999911785126,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.061400000005960464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/attribution","display_name":"Attribution","score":0.8370000123977661},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4887999892234802},{"id":"https://openalex.org/keywords/causal-reasoning","display_name":"Causal reasoning","score":0.45239999890327454},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4462999999523163},{"id":"https://openalex.org/keywords/causal-model","display_name":"Causal model","score":0.43070000410079956},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.32269999384880066}],"concepts":[{"id":"https://openalex.org/C143299363","wikidata":"https://www.wikidata.org/wiki/Q900584","display_name":"Attribution","level":2,"score":0.8370000123977661},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.507099986076355},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4887999892234802},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.45239999890327454},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4462999999523163},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44359999895095825},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.43070000410079956},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.42170000076293945},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.38769999146461487},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C32848918","wikidata":"https://www.wikidata.org/wiki/Q845789","display_name":"Observable","level":2,"score":0.25369998812675476},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25119999051094055},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13652","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13652","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13652","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13652","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","score":0.722491979598999,"display_name":"No poverty"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Attribution":[0,86],"methods":[1,204],"for":[2,66],"Vision":[3],"Transformers":[4],"(ViTs)":[5],"aim":[6],"to":[7,30,73,99],"identify":[8],"image":[9,41,97],"regions":[10],"that":[11,47,188],"influence":[12,174],"model":[13],"predictions,":[14],"but":[15],"producing":[16],"faithful":[17,208],"and":[18,25,59,138,197,205,209],"well-localized":[19],"attributions":[20],"remains":[21],"challenging.":[22],"Existing":[23],"gradient-based":[24],"perturbation-based":[26],"techniques":[27],"often":[28],"fail":[29,72],"isolate":[31],"the":[32,75,81,93,100,123,140,145,153,161],"causal":[33,154,165],"contribution":[34,94],"of":[35,95,136,156,172],"internal":[36,76,107,158],"representations":[37,159],"associated":[38],"with":[39],"individual":[40,96],"patches.":[42],"The":[43,148,164],"key":[44],"challenge":[45],"is":[46,50],"class-relevant":[48,177],"evidence":[49,77,178],"formed":[51],"through":[52],"interactions":[53],"between":[54],"patch":[55,67,173],"tokens":[56],"across":[57],"layers,":[58],"input-level":[60],"perturbations":[61],"can":[62,189],"be":[63],"poor":[64],"proxies":[65],"importance,":[68],"since":[69],"they":[70],"may":[71],"reconstruct":[74],"actually":[78],"used":[79],"by":[80,103,175],"model.":[82],"We":[83],"propose":[84],"Causal":[85],"via":[87],"Activation":[88],"Patching":[89],"(CAAP),":[90],"which":[91],"estimates":[92],"patches":[98],"ViT's":[101],"prediction":[102],"directly":[104],"intervening":[105],"on":[106,160],"activations":[108,126],"rather":[109],"than":[110],"using":[111],"learned":[112],"masks":[113],"or":[114],"synthetic":[115],"perturbation":[116],"patterns.":[117],"For":[118],"each":[119],"patch,":[120],"CAAP":[121,200],"inserts":[122],"corresponding":[124],"source-image":[125],"into":[127],"a":[128,169],"neutral":[129],"target":[130],"context":[131],"over":[132],"an":[133],"intermediate":[134],"range":[135],"layers":[137],"uses":[139],"resulting":[141,149],"target-class":[142],"score":[143],"as":[144,168],"attribution":[146,150],"signal.":[147],"map":[151],"reflects":[152],"effect":[155],"patch-associated":[157],"model's":[162],"prediction.":[163],"intervention":[166],"serves":[167],"principled":[170],"measure":[171],"capturing":[176],"after":[179],"initial":[180],"representation":[181],"formation,":[182],"while":[183],"avoiding":[184],"late-layer":[185],"global":[186],"mixing":[187],"reduce":[190],"spatial":[191],"specificity.":[192],"Across":[193],"multiple":[194],"ViT":[195],"backbones":[196],"standard":[198],"metrics,":[199],"significantly":[201],"outperforms":[202],"existing":[203],"produces":[206],"more":[207],"localized":[210],"attributions.":[211]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
