{"id":"https://openalex.org/W4403851170","doi":"https://doi.org/10.48550/arxiv.2409.14507","title":"A is for Absorption: Studying Feature Splitting and Absorption in Sparse Autoencoders","display_name":"A is for Absorption: Studying Feature Splitting and Absorption in Sparse Autoencoders","publication_year":2024,"publication_date":"2024-09-22","ids":{"openalex":"https://openalex.org/W4403851170","doi":"https://doi.org/10.48550/arxiv.2409.14507"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2409.14507","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.14507","pdf_url":"https://arxiv.org/pdf/2409.14507","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2409.14507","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049886297","display_name":"D.I. Chanin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chanin, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114438805","display_name":"James Wilken-Smith","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wilken-Smith, James","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114438806","display_name":"Tom\u00e1\u0161 Dulka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dulka, Tom\u00e1\u0161","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114438807","display_name":"Hardik Bhatnagar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhatnagar, Hardik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023514744","display_name":"John R Bloom","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Golechha, Satvik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Bloom, Joseph","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bloom, Joseph","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5049886297"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9476000070571899,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9476000070571899,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/absorption","display_name":"Absorption (acoustics)","score":0.7214849591255188},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.7133064270019531},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.43206268548965454},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3716772198677063},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3550582826137543},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.3549875319004059},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.3534702658653259},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.2567085921764374},{"id":"https://openalex.org/keywords/optics","display_name":"Optics","score":0.2088187038898468},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.06048139929771423}],"concepts":[{"id":"https://openalex.org/C125287762","wikidata":"https://www.wikidata.org/wiki/Q1758948","display_name":"Absorption (acoustics)","level":2,"score":0.7214849591255188},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.7133064270019531},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.43206268548965454},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3716772198677063},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3550582826137543},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.3549875319004059},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.3534702658653259},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.2567085921764374},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.2088187038898468},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.06048139929771423},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2409.14507","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.14507","pdf_url":"https://arxiv.org/pdf/2409.14507","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2409.14507","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2409.14507","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2409.14507","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.14507","pdf_url":"https://arxiv.org/pdf/2409.14507","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403851170.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2016187641","https://openalex.org/W2805339068","https://openalex.org/W4246450666","https://openalex.org/W4388998267","https://openalex.org/W2898370298","https://openalex.org/W2137437058","https://openalex.org/W4390401159","https://openalex.org/W2033914206","https://openalex.org/W2042327336"],"abstract_inverted_index":{"Sparse":[0],"Autoencoders":[1],"(SAEs)":[2],"aim":[3],"to":[4,33,48,74,116,142,159],"decompose":[5],"the":[6,23,28,106,148,161],"activation":[7],"space":[8],"of":[9,25,60,128,150],"large":[10],"language":[11],"models":[12],"(LLMs)":[13],"into":[14,35,41,83],"human-interpretable":[15],"latent":[16],"directions":[17],"or":[18,138],"features.":[19,86],"As":[20],"we":[21,53,67],"increase":[22],"number":[24],"features":[26,31,37,62,72,108],"in":[27,103,119,153],"SAE,":[29],"hierarchical":[30,61],"tend":[32],"split":[34,40],"finer":[36],"(\"math\"":[38],"may":[39],"\"algebra\",":[42],"\"geometry\",":[43],"etc.),":[44],"a":[45,110,114],"phenomenon":[46,90],"referred":[47],"as":[49],"feature":[50,91,151],"splitting.":[51],"However,":[52],"show":[54,68,94],"that":[55,69,95,134],"sparse":[56],"decomposition":[57],"and":[58,79,93,121,155,174],"splitting":[59],"is":[63,97,140],"not":[64],"robust.":[65],"Specifically,":[66],"seemingly":[70],"monosemantic":[71],"fail":[73],"fire":[75],"where":[76],"they":[77],"should,":[78],"instead":[80],"get":[81],"\"absorbed\"":[82],"their":[84],"children":[85],"We":[87,112,146],"coin":[88],"this":[89,144],"absorption,":[92],"it":[96],"caused":[98],"by":[99],"optimizing":[100],"for":[101,170],"sparsity":[102,139],"SAEs":[104,154,166],"whenever":[105],"underlying":[107],"form":[109],"hierarchy.":[111],"introduce":[113],"metric":[115],"detect":[117],"absorption":[118,152],"SAEs,":[120],"validate":[122],"our":[123],"findings":[124],"empirically":[125],"on":[126],"hundreds":[127],"LLM":[129],"SAEs.":[130],"Our":[131],"investigation":[132],"suggests":[133],"varying":[135],"SAE":[136],"sizes":[137],"insufficient":[141],"solve":[143,160],"issue.":[145],"discuss":[147],"implications":[149],"some":[156],"potential":[157],"approaches":[158],"fundamental":[162],"theoretical":[163],"issues":[164],"before":[165],"can":[167],"be":[168],"used":[169],"interpreting":[171],"LLMs":[172],"robustly":[173],"at":[175],"scale.":[176]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
