{"id":"https://openalex.org/W7162678669","doi":"https://doi.org/10.48550/arxiv.2605.27813","title":"Residualized Temporal Sparse Autoencoders for Interpreting Diffusion Models","display_name":"Residualized Temporal Sparse Autoencoders for Interpreting Diffusion Models","publication_year":2026,"publication_date":"2026-05-27","ids":{"openalex":"https://openalex.org/W7162678669","doi":"https://doi.org/10.48550/arxiv.2605.27813"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.27813","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.27813","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.27813","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034093897","display_name":"Calvin Yeung","orcid":"https://orcid.org/0009-0008-3326-8931"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yeung, Calvin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137205373","display_name":"Prathyush Poduval","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Poduval, Prathyush","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125666562","display_name":"Ali Zakeri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zakeri, Ali","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035947458","display_name":"Zhuowen Zou","orcid":"https://orcid.org/0000-0001-9057-8815"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Zhuowen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137247124","display_name":"Mohsen Imani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Imani, Mohsen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11304","display_name":"Advanced Neuroimaging Techniques and Applications","score":0.866599977016449,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11304","display_name":"Advanced Neuroimaging Techniques and Applications","score":0.866599977016449,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.0272000003606081,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10241","display_name":"Functional Brain Connectivity Studies","score":0.023499999195337296,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6789000034332275},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.6366000175476074},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5967000126838684},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.5583000183105469},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5281000137329102},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.3871999979019165},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.36980000138282776}],"concepts":[{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6789000034332275},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.6366000175476074},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6240000128746033},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6078000068664551},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5967000126838684},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.5583000183105469},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5281000137329102},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.3871999979019165},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.36980000138282776},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.3456000089645386},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.33410000801086426},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32010000944137573},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.3027999997138977},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2919999957084656},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.27813","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.27813","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.27813","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.27813","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Text-to-image":[0],"diffusion":[1,32,67,171],"models":[2],"generate":[3],"images":[4],"through":[5],"an":[6,87,101],"iterative":[7],"denoising":[8,74,140],"process,":[9],"so":[10],"internal":[11],"neural":[12],"layers":[13],"produce":[14],"trajectories":[15],"of":[16],"activations":[17,33,42,72],"rather":[18,50],"than":[19,51],"single":[20],"static":[21],"representations.":[22],"Sparse":[23],"autoencoders":[24],"(SAEs)":[25],"have":[26],"recently":[27],"been":[28],"used":[29],"to":[30,110,132],"decompose":[31],"into":[34,126],"interpretable":[35],"feature":[36,137,148],"directions,":[37],"but":[38],"most":[39],"approaches":[40],"analyze":[41],"at":[43],"individual":[44],"timesteps":[45],"or":[46],"condition":[47],"on":[48,103,154],"time":[49],"learning":[52],"directly":[53],"from":[54],"full":[55],"activation":[56,68,89,127],"trajectories.":[57,69],"In":[58],"this":[59,104],"work,":[60],"we":[61,157],"introduce":[62],"residualized":[63,105,119,160],"temporal":[64,161],"SAEs":[65,162],"for":[66,167],"We":[70],"collect":[71],"across":[73],"time,":[75],"fit":[76],"linear":[77,98],"predictors":[78],"between":[79],"neighboring":[80],"timesteps,":[81],"and":[82,144,150],"represent":[83],"each":[84,130],"trajectory":[85,138],"using":[86],"initial":[88],"together":[90],"with":[91],"residual":[92],"components":[93],"not":[94],"explained":[95],"by":[96],"these":[97],"dynamics.":[99],"Training":[100],"SAE":[102],"representation":[106],"encourages":[107],"sparse":[108],"latents":[109],"capture":[111],"structure":[112],"beyond":[113],"what":[114],"is":[115],"linearly":[116],"predictable.":[117],"The":[118],"decoder":[120],"directions":[121],"can":[122],"be":[123,133],"mapped":[124],"back":[125],"space,":[128],"allowing":[129],"latent":[131],"analyzed":[134],"as":[135],"a":[136,164],"over":[139],"time.":[141],"Through":[142],"reconstruction":[143],"ablation":[145],"studies,":[146],"spatiotemporal":[147],"analysis,":[149],"qualitative":[151],"steering":[152],"experiments":[153],"Stable":[155],"Diffusion~1.5,":[156],"show":[158],"that":[159],"provide":[163],"useful":[165],"framework":[166],"studying":[168],"temporally":[169],"structured":[170],"activations.":[172]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-29T00:00:00"}
