{"id":"https://openalex.org/W4416035733","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.1345","title":"Decoding Dense Embeddings: Sparse Autoencoders for Interpreting and Discretizing Dense Retrieval","display_name":"Decoding Dense Embeddings: Sparse Autoencoders for Interpreting and Discretizing Dense Retrieval","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416035733","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.1345"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.1345","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1345","pdf_url":"https://aclanthology.org/2025.emnlp-main.1345.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.1345.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035580019","display_name":"Seong-Wan Park","orcid":"https://orcid.org/0000-0001-9612-0020"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Seongwan Park","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030202737","display_name":"Taek-Lim Kim","orcid":"https://orcid.org/0000-0002-8001-4212"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taeklim Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5008710152","display_name":"Youngjoong Ko","orcid":"https://orcid.org/0000-0002-0241-9193"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Youngjoong Ko","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5035580019"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1744365,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"26479","last_page":"26496"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3066999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3066999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.19689999520778656,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.06260000169277191,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5335999727249146},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5002999901771545},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.3303000032901764},{"id":"https://openalex.org/keywords/discretization","display_name":"Discretization","score":0.31929999589920044},{"id":"https://openalex.org/keywords/noisy-data","display_name":"Noisy data","score":0.31220000982284546},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.3059999942779541}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6413000226020813},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.63919997215271},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5335999727249146},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5002999901771545},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3303000032901764},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.328000009059906},{"id":"https://openalex.org/C73000952","wikidata":"https://www.wikidata.org/wiki/Q17007827","display_name":"Discretization","level":2,"score":0.31929999589920044},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.31369999051094055},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.31220000982284546},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.3059999942779541},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2897999882698059},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2606000006198883}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.1345","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1345","pdf_url":"https://aclanthology.org/2025.emnlp-main.1345.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.1345","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1345","pdf_url":"https://aclanthology.org/2025.emnlp-main.1345.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1610820195","display_name":null,"funder_award_id":"RS-2019-II190421","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G1630947553","display_name":null,"funder_award_id":"RS-2022-II220369","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G3034753964","display_name":null,"funder_award_id":"grant","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"},{"id":"https://openalex.org/G342704958","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G3996153573","display_name":null,"funder_award_id":"RS-2022-II220369","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G4377996611","display_name":null,"funder_award_id":"RS-2019-II190421","funder_id":"https://openalex.org/F4320321378","funder_display_name":"Sungkyunkwan University"},{"id":"https://openalex.org/G438840348","display_name":null,"funder_award_id":"RS-2019-II190421","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G4700831490","display_name":null,"funder_award_id":"2022-","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G6072120315","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G6292470738","display_name":null,"funder_award_id":"2022-0-00369","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G6580753045","display_name":null,"funder_award_id":"RS-2020-II201821","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G7248435635","display_name":null,"funder_award_id":"RS-2024-00350379","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G7537039438","display_name":null,"funder_award_id":"2022-0-00369","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G7988537878","display_name":null,"funder_award_id":"RS-2020-II201821","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320321378","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416035733.pdf","grobid_xml":"https://content.openalex.org/works/W4416035733.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"their":[1],"strong":[2],"performance,":[3],"Dense":[4],"Passage":[5],"Retrieval":[6,70],"(DPR)":[7],"models":[8,36],"suffer":[9],"from":[10,34],"a":[11,19,72],"lack":[12],"of":[13,53,63,90,98],"interpretability.In":[14],"this":[15],"work,":[16],"we":[17],"propose":[18],"novel":[20],"interpretability":[21],"framework":[22,74],"that":[23,75,102],"leverages":[24],"Sparse":[25,69],"Autoencoders":[26],"(SAEs)":[27],"to":[28],"decompose":[29],"previously":[30],"uninterpretable":[31],"dense":[32,56,91],"embeddings":[33,57,92],"DPR":[35,64],"into":[37],"distinct,":[38],"interpretable":[39],"latent":[40,48,80],"concepts.We":[41],"generate":[42],"natural":[43],"language":[44],"descriptions":[45],"for":[46],"each":[47],"concept,":[49],"enabling":[50],"human":[51],"interpretations":[52],"both":[54],"the":[55,59,78,87,94],"and":[58,96,107,116],"query-document":[60],"similarity":[61],"scores":[62],"models.We":[65],"further":[66],"introduce":[67],"Concept-Level":[68],"(CL-SR),":[71],"retrieval":[73],"directly":[76],"utilizes":[77],"extracted":[79],"concepts":[81],"as":[82],"indexing":[83],"units.CL-SR":[84],"effectively":[85],"combines":[86],"semantic":[88,117],"expressiveness":[89],"with":[93],"transparency":[95],"efficiency":[97,109],"sparse":[99],"representations.We":[100],"show":[101],"CL-SR":[103],"achieves":[104],"high":[105],"index-space":[106],"computational":[108],"while":[110],"maintaining":[111],"robust":[112],"performance":[113],"across":[114],"vocabulary":[115],"mismatches":[118],"1":[119],".":[120]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-11-08T00:00:00"}
