{"id":"https://openalex.org/W7137980530","doi":"https://doi.org/10.48550/arxiv.2603.14665","title":"Gradient Atoms: Unsupervised Discovery, Attribution and Steering of Model Behaviors via Sparse Decomposition of Training Gradients","display_name":"Gradient Atoms: Unsupervised Discovery, Attribution and Steering of Model Behaviors via Sparse Decomposition of Training Gradients","publication_year":2026,"publication_date":"2026-03-15","ids":{"openalex":"https://openalex.org/W7137980530","doi":"https://doi.org/10.48550/arxiv.2603.14665"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.14665","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14665","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.14665","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129710467","display_name":"J Rosser","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rosser, J","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5129710467"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6764000058174133,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6764000058174133,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.051500000059604645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.031700000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ask-price","display_name":"Ask price","score":0.6970999836921692},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.48510000109672546},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4812999963760376},{"id":"https://openalex.org/keywords/attribution","display_name":"Attribution","score":0.44519999623298645},{"id":"https://openalex.org/keywords/co-training","display_name":"Co-training","score":0.43059998750686646},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.390500009059906},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.3562999963760376}],"concepts":[{"id":"https://openalex.org/C90329073","wikidata":"https://www.wikidata.org/wiki/Q914232","display_name":"Ask price","level":2,"score":0.6970999836921692},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6802999973297119},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5701000094413757},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.48510000109672546},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4812999963760376},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4537000060081482},{"id":"https://openalex.org/C143299363","wikidata":"https://www.wikidata.org/wiki/Q900584","display_name":"Attribution","level":2,"score":0.44519999623298645},{"id":"https://openalex.org/C2776959682","wikidata":"https://www.wikidata.org/wiki/Q17005296","display_name":"Co-training","level":3,"score":0.43059998750686646},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.390500009059906},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.3562999963760376},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.35260000824928284},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.334199994802475},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2727999985218048},{"id":"https://openalex.org/C169087156","wikidata":"https://www.wikidata.org/wiki/Q2131593","display_name":"Framing (construction)","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.26190000772476196},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.257099986076355}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.14665","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14665","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.14665","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14665","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5104534029960632}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Training":[0],"data":[1],"attribution":[2],"(TDA)":[3],"methods":[4,28,108],"ask":[5,61],"which":[6],"training":[7,41,73],"documents":[8],"are":[9,29],"responsible":[10],"for":[11],"a":[12,34,83,89,95],"model":[13,152],"behavior.":[14],"However,":[15],"models":[16],"often":[17],"learn":[18],"broad":[19],"concepts":[20],"shared":[21,90],"across":[22],"many":[23],"examples.":[24],"Moreover,":[25],"existing":[26],"TDA":[27],"supervised":[30],"--":[31,45,123,130],"they":[32],"require":[33],"predefined":[35],"query":[36,179],"behavior,":[37],"then":[38],"score":[39],"every":[40],"document":[42],"against":[43],"it":[44],"making":[46],"them":[47,143],"both":[48],"expensive":[49],"and":[50,172],"unable":[51],"to":[52,60,158,163],"surface":[53],"behaviors":[54,122,180],"the":[55,103,116,176],"user":[56],"did":[57],"not":[58,110],"think":[59],"about.":[62],"We":[63],"present":[64],"Gradient":[65],"Atoms,":[66],"an":[67],"unsupervised":[68],"method":[69,166],"that":[70,106],"decomposes":[71],"per-document":[72,107],"gradients":[74],"into":[75],"sparse":[76],"components":[77],"(\"atoms\")":[78],"via":[79],"dictionary":[80],"learning":[81],"in":[82,151],"preconditioned":[84],"eigenspace.":[85],"Each":[86],"atom":[87],"captures":[88],"update":[91],"direction":[92],"induced":[93],"by":[94],"cluster":[96],"of":[97,175,178,181],"functionally":[98],"similar":[99],"documents,":[100],"directly":[101],"recovering":[102],"collective":[104],"structure":[105],"do":[109],"address.":[111],"Among":[112],"500":[113],"discovered":[114],"atoms,":[115],"highest-coherence":[117],"ones":[118],"recover":[119],"interpretable":[120],"task-type":[121],"refusal,":[124],"arithmetic,":[125],"yes/no":[126],"classification,":[127],"trivia":[128],"QA":[129],"without":[131],"any":[132],"behavioral":[133],"labels.":[134],"These":[135],"atoms":[136],"double":[137],"as":[138,144],"effective":[139],"steering":[140],"vectors:":[141],"applying":[142],"weight-space":[145],"perturbations":[146],"produces":[147],"large,":[148],"controllable":[149],"shifts":[150],"behavior":[153],"(e.g.,":[154],"bulleted-list":[155],"generation":[156],"33%":[157],"94%;":[159],"systematic":[160],"refusal":[161],"50%":[162],"0%).":[164],"The":[165],"requires":[167],"no":[168],"query--document":[169],"scoring":[170],"stage,":[171],"scales":[173],"independently":[174],"number":[177],"interest.":[182],"Code":[183],"is":[184],"available":[185],"at":[186],"https://github.com/jrosseruk/gradient_atoms.":[187]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
