{"id":"https://openalex.org/W7106690494","doi":"https://doi.org/10.48550/arxiv.2511.17735","title":"Towards Open-Ended Visual Scientific Discovery with Sparse Autoencoders","display_name":"Towards Open-Ended Visual Scientific Discovery with Sparse Autoencoders","publication_year":2025,"publication_date":"2025-11-21","ids":{"openalex":"https://openalex.org/W7106690494","doi":"https://doi.org/10.48550/arxiv.2511.17735"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2511.17735","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.17735","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2511.17735","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Stevens, Samuel","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Stevens, Samuel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Beattie, Jacob","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Beattie, Jacob","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Berger-Wolf, Tanya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Berger-Wolf, Tanya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Su, Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Yu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.22450000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.22450000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09809999912977219,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.08070000261068344,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5860999822616577},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.541700005531311},{"id":"https://openalex.org/keywords/scientific-discovery","display_name":"Scientific discovery","score":0.5127999782562256},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.478300005197525},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.445499986410141},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.3921999931335449},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.33410000801086426}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6808000206947327},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5860999822616577},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5457000136375427},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.541700005531311},{"id":"https://openalex.org/C2984917352","wikidata":"https://www.wikidata.org/wiki/Q12772819","display_name":"Scientific discovery","level":2,"score":0.5127999782562256},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.478300005197525},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.445499986410141},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.39629998803138733},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.3921999931335449},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.33410000801086426},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31839999556541443},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.29510000348091125},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.29190000891685486},{"id":"https://openalex.org/C138379479","wikidata":"https://www.wikidata.org/wiki/Q1116876","display_name":"Scientific modelling","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C90329073","wikidata":"https://www.wikidata.org/wiki/Q914232","display_name":"Ask price","level":2,"score":0.2567000091075897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2511.17735","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.17735","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2511.17735","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.17735","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Climate action","id":"https://metadata.un.org/sdg/13","score":0.8707082867622375}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scientific":[0],"archives":[1],"now":[2],"contain":[3],"hundreds":[4],"of":[5,7,37,73],"petabytes":[6],"data":[8],"across":[9],"genomics,":[10,175],"ecology,":[11],"climate,":[12],"and":[13,31,47,116,166],"molecular":[14],"biology":[15],"that":[16,180],"could":[17],"reveal":[18],"undiscovered":[19],"patterns":[20],"if":[21],"systematically":[22],"analyzed":[23],"at":[24,65],"scale.":[25],"Large-scale,":[26],"weakly-supervised":[27],"datasets":[28],"in":[29,95,170],"language":[30],"vision":[32,156],"have":[33,193],"driven":[34],"the":[35,100,129,162],"development":[36],"foundation":[38,88,191],"models":[39,169,192],"whose":[40],"internal":[41],"representations":[42],"encode":[43],"structure":[44,58,135],"(patterns,":[45],"co-occurrences":[46],"statistical":[48],"regularities)":[49],"beyond":[50],"their":[51],"training":[52],"objectives.":[53],"Most":[54],"existing":[55],"methods":[56],"extract":[57],"only":[59],"for":[60,106,187,198],"pre-specified":[61],"targets;":[62],"they":[63],"excel":[64],"confirmation":[66,201],"but":[67],"do":[68],"not":[69],"support":[70],"open-ended":[71,84],"discovery":[72,86],"unknown":[74],"patterns.":[75],"We":[76,91],"ask":[77],"whether":[78],"sparse":[79,181],"autoencoders":[80],"(SAEs)":[81],"can":[82],"enable":[83],"feature":[85],"from":[87,200],"model":[89],"representations.":[90],"evaluate":[92],"this":[93],"question":[94],"controlled":[96],"rediscovery":[97],"studies,":[98],"where":[99],"learned":[101],"SAE":[102],"features":[103],"are":[104],"tested":[105],"alignment":[107],"with":[108,148,157],"semantic":[109],"concepts":[110],"on":[111,122,155],"a":[112,144,184],"standard":[113],"segmentation":[114,139],"benchmark":[115],"compared":[117],"against":[118],"strong":[119],"label-free":[120],"alternatives":[121],"concept-alignment":[123],"metrics.":[124],"Applied":[125],"to":[126,138,168,202],"ecological":[127],"imagery,":[128],"same":[130],"procedure":[131],"surfaces":[132],"fine-grained":[133],"anatomical":[134],"without":[136],"access":[137],"or":[140],"part":[141],"labels,":[142],"providing":[143],"scientific":[145,190],"case":[146,160],"study":[147],"ground-truth":[149],"validation.":[150],"While":[151],"our":[152],"experiments":[153],"focus":[154],"an":[158,195],"ecology":[159],"study,":[161],"method":[163],"is":[164],"domain-agnostic":[165],"applicable":[167],"other":[171],"sciences":[172],"(e.g.,":[173],"proteins,":[174],"weather).":[176],"Our":[177],"results":[178],"indicate":[179],"decomposition":[182],"provides":[183],"practical":[185],"instrument":[186],"exploring":[188],"what":[189],"learned,":[194],"important":[196],"prerequisite":[197],"moving":[199],"genuine":[203],"discovery.":[204]},"counts_by_year":[],"updated_date":"2025-11-27T01:16:37.896743","created_date":"2025-11-27T00:00:00"}
