{"id":"https://openalex.org/W4415085995","doi":"https://doi.org/10.48550/arxiv.2503.01822","title":"Projecting Assumptions: The Duality Between Sparse Autoencoders and Concept Geometry","display_name":"Projecting Assumptions: The Duality Between Sparse Autoencoders and Concept Geometry","publication_year":2025,"publication_date":"2025-03-03","ids":{"openalex":"https://openalex.org/W4415085995","doi":"https://doi.org/10.48550/arxiv.2503.01822"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2503.01822","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.01822","pdf_url":"https://arxiv.org/pdf/2503.01822","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2503.01822","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093262518","display_name":"Sai Sumedh R. Hindupur","orcid":"https://orcid.org/0000-0002-8772-8488"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hindupur, Sai Sumedh R.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069090559","display_name":"Ekdeep Singh Lubana","orcid":"https://orcid.org/0000-0002-7200-9341"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lubana, Ekdeep Singh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038564554","display_name":"Thomas Fel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fel, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5002643771","display_name":"Demba Ba","orcid":"https://orcid.org/0000-0002-1139-1030"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ba, Demba","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5093262518"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.7458999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.7458999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.536899983882904},{"id":"https://openalex.org/keywords/duality","display_name":"Duality (order theory)","score":0.49939998984336853},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.43630000948905945},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.361299991607666},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.35910001397132874},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.34060001373291016},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.32850000262260437}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5907999873161316},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.536899983882904},{"id":"https://openalex.org/C2778023678","wikidata":"https://www.wikidata.org/wiki/Q554403","display_name":"Duality (order theory)","level":2,"score":0.49939998984336853},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48330000042915344},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.43630000948905945},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.37770000100135803},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.361299991607666},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3206000030040741},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.29350000619888306},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C156778621","wikidata":"https://www.wikidata.org/wiki/Q1365748","display_name":"Spectrum (functional analysis)","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2581999897956848},{"id":"https://openalex.org/C136119220","wikidata":"https://www.wikidata.org/wiki/Q1000660","display_name":"Algebra over a field","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2540999948978424}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2503.01822","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.01822","pdf_url":"https://arxiv.org/pdf/2503.01822","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2503.01822","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.01822","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2503.01822","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.01822","pdf_url":"https://arxiv.org/pdf/2503.01822","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sparse":[0],"Autoencoders":[1],"(SAEs)":[2],"are":[3,29,65,84,152,156,172],"widely":[4],"used":[5],"to":[6,48,120,129,166],"interpret":[7],"neural":[8],"networks":[9],"by":[10],"identifying":[11],"meaningful":[12],"concepts":[13,23,64,94,143,151,168,190,224],"from":[14,112],"their":[15],"representations.":[16],"However,":[17],"do":[18],"SAEs":[19,45,83,106,164],"truly":[20],"uncover":[21],"all":[22],"a":[24,40,49,54,108,177,202],"model":[25,68,125,213],"relies":[26],"on,":[27],"or":[28,95],"they":[30],"inherently":[31,153],"biased":[32],"toward":[33],"certain":[34],"kinds":[35],"of":[36,110,187,201],"concepts?":[37],"We":[38,161],"introduce":[39],"unified":[41],"framework":[42],"that":[43,116,141,163,180],"recasts":[44],"as":[46],"solutions":[47],"bilevel":[50],"optimization":[51],"problem,":[52],"revealing":[53],"fundamental":[55,139],"challenge:":[56],"each":[57],"SAE":[58,179,204,219],"imposes":[59],"structural":[60],"assumptions":[61],"about":[62],"how":[63],"encoded":[66],"in":[67,71,147,212],"representations,":[69],"which":[70],"turn":[72],"shapes":[73],"what":[74,228],"it":[75,226],"can":[76,90,229],"and":[77,127,158,174,191,205],"cannot":[78],"detect.":[79],"This":[80],"means":[81],"different":[82],"not":[85,221],"interchangeable":[86],"--":[87,225],"switching":[88],"architectures":[89],"expose":[91],"entirely":[92],"new":[93,178],"obscure":[96],"existing":[97],"ones.":[98],"To":[99],"systematically":[100],"probe":[101],"this":[102,134],"effect,":[103],"we":[104,136,175,216],"evaluate":[105],"across":[107],"spectrum":[109],"settings:":[111],"controlled":[113],"toy":[114],"models":[115],"isolate":[117],"key":[118],"variables,":[119],"semi-synthetic":[121],"experiments":[122],"on":[123],"real":[124],"activations":[126],"finally":[128],"large-scale,":[130],"naturalistic":[131],"datasets.":[132],"Across":[133],"progression,":[135],"examine":[137],"two":[138],"properties":[140,171],"real-world":[142],"often":[144],"exhibit:":[145],"heterogeneity":[146],"intrinsic":[148],"dimensionality":[149],"(some":[150],"low-dimensional,":[154],"others":[155],"not)":[157],"nonlinear":[159],"separability.":[160],"show":[162],"fail":[165],"recover":[167],"when":[169],"these":[170],"ignored,":[173],"design":[176],"explicitly":[181],"incorporates":[182],"both,":[183],"enabling":[184],"the":[185,199,207],"discovery":[186],"previously":[188],"hidden":[189],"reinforcing":[192],"our":[193],"theoretical":[194],"insights.":[195],"Our":[196],"findings":[197],"challenge":[198],"idea":[200],"universal":[203],"underscores":[206],"need":[208],"for":[209],"architecture-specific":[210],"choices":[211],"interpretability.":[214],"Overall,":[215],"argue":[217],"an":[218],"does":[220],"just":[222],"reveal":[223],"determines":[227],"be":[230],"seen":[231],"at":[232],"all.":[233]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-12T00:00:00"}
