{"id":"https://openalex.org/W7159661641","doi":"https://doi.org/10.48550/arxiv.2604.28119","title":"Do Sparse Autoencoders Capture Concept Manifolds?","display_name":"Do Sparse Autoencoders Capture Concept Manifolds?","publication_year":2026,"publication_date":"2026-04-30","ids":{"openalex":"https://openalex.org/W7159661641","doi":"https://doi.org/10.48550/arxiv.2604.28119"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.28119","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28119","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.28119","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077350902","display_name":"Usha Bhalla","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhalla, Usha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134974531","display_name":"Thomas Fel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fel, Thomas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063383301","display_name":"Can Rager","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rager, Can","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134966905","display_name":"Sheridan Feucht","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feucht, Sheridan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092671619","display_name":"Tal Haklay","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haklay, Tal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092206144","display_name":"Daniel Wurgaft","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wurgaft, Daniel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128261556","display_name":"Siddharth Boppana","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Boppana, Siddharth","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120318092","display_name":"Matthew Kowal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kowal, Matthew","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124812908","display_name":"Vasudev Shyam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shyam, Vasudev","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062612448","display_name":"Jack Merullo","orcid":"https://orcid.org/0009-0005-9673-6809"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Merullo, Jack","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002577142","display_name":"Atticus Geiger","orcid":"https://orcid.org/0000-0002-9170-506X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Geiger, Atticus","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134997807","display_name":"Ekdeep Singh Lubana","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lubana, Ekdeep Singh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.6973999738693237,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.6973999738693237,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.1143999993801117,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.02630000002682209,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6215000152587891},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.61080002784729},{"id":"https://openalex.org/keywords/manifold","display_name":"Manifold (fluid mechanics)","score":0.5685999989509583},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.47040000557899475},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4458000063896179},{"id":"https://openalex.org/keywords/mixing","display_name":"Mixing (physics)","score":0.43070000410079956},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4027000069618225},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.3743000030517578}],"concepts":[{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6215000152587891},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.61080002784729},{"id":"https://openalex.org/C529865628","wikidata":"https://www.wikidata.org/wiki/Q1790740","display_name":"Manifold (fluid mechanics)","level":2,"score":0.5685999989509583},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5430999994277954},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5346999764442444},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47040000557899475},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4458000063896179},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.43070000410079956},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4027000069618225},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3779999911785126},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.3743000030517578},{"id":"https://openalex.org/C2780728851","wikidata":"https://www.wikidata.org/wiki/Q468402","display_name":"Tile","level":2,"score":0.37049999833106995},{"id":"https://openalex.org/C12362212","wikidata":"https://www.wikidata.org/wiki/Q728435","display_name":"Linear subspace","level":2,"score":0.34220001101493835},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34049999713897705},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.34040001034736633},{"id":"https://openalex.org/C151876577","wikidata":"https://www.wikidata.org/wiki/Q7049464","display_name":"Nonlinear dimensionality reduction","level":3,"score":0.33889999985694885},{"id":"https://openalex.org/C2778753569","wikidata":"https://www.wikidata.org/wiki/Q1960395","display_name":"Span (engineering)","level":2,"score":0.3255000114440918},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32190001010894775},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3091999888420105},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.3043000102043152},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.289900004863739},{"id":"https://openalex.org/C133226019","wikidata":"https://www.wikidata.org/wiki/Q209812","display_name":"Linear span","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.26010000705718994}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.28119","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28119","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.28119","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28119","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sparse":[0],"autoencoders":[1],"(SAEs)":[2],"are":[3,36],"widely":[4],"used":[5],"to":[6,22,58],"extract":[7],"interpretable":[8],"features":[9,113],"from":[10],"neural":[11],"network":[12],"representations,":[13],"often":[14],"under":[15],"the":[16,104,122,135,158,199],"implicit":[17],"assumption":[18],"that":[19,33,76,82,114,128,169,185],"concepts":[20,35,162],"correspond":[21],"independent":[23],"linear":[24,101],"directions.":[25,179],"However,":[26],"a":[27,60,73,95,118,143],"growing":[28],"body":[29],"of":[30,98,121,160,174,202],"evidence":[31],"suggests":[32],"many":[34],"instead":[37],"organized":[38],"along":[39],"low-dimensional":[40],"manifolds":[41,86],"encoding":[42],"continuous":[43,132],"geometric":[44,192],"relationships.":[45],"This":[46,149],"raises":[47],"three":[48],"basic":[49,200],"questions:":[50],"what":[51],"does":[52],"it":[53,111],"mean":[54],"for":[55,171],"an":[56],"SAE":[57,65],"capture":[59,85],"manifold,":[61,106],"when":[62],"do":[63,67],"existing":[64],"architectures":[66],"so,":[68],"and":[69,80,138,163],"how?":[70],"We":[71],"develop":[72],"theoretical":[74],"framework":[75],"answers":[77],"these":[78],"questions":[79],"show":[81],"SAEs":[83,129],"can":[84],"in":[87,142],"two":[88],"fundamentally":[89],"different":[90],"ways:":[91],"globally,":[92],"by":[93,109],"allocating":[94],"compact":[96],"group":[97],"atoms":[99,175],"whose":[100],"span":[102],"contains":[103],"entire":[105],"or":[107],"locally,":[108],"distributing":[110],"across":[112],"each":[115],"selectively":[116],"tile":[117],"restricted":[119],"region":[120],"underlying":[123],"geometry.":[124],"Empirically,":[125],"we":[126,146],"find":[127],"suboptimally":[130],"recover":[131],"structures,":[133],"mixing":[134],"global":[136],"subspace":[137],"local":[139],"tiling":[140],"solutions":[141],"fragmented":[144],"regime":[145],"call":[147],"dilution.":[148],"explains":[150],"why":[151],"manifold":[152],"structure":[153],"is":[154],"rarely":[155],"visible":[156],"at":[157],"level":[159],"individual":[161,196],"motivates":[164],"post-hoc":[165],"unsupervised":[166],"discovery":[167],"methods":[168,189],"search":[170],"coherent":[172],"groups":[173],"rather":[176],"than":[177],"isolated":[178],"More":[180],"broadly,":[181],"our":[182],"results":[183],"suggest":[184],"future":[186],"representation":[187],"learning":[188],"should":[190],"treat":[191],"objects,":[193],"not":[194],"just":[195],"directions,":[197],"as":[198],"units":[201],"interpretability.":[203]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-02T00:00:00"}
