{"id":"https://openalex.org/W4400717805","doi":"https://doi.org/10.48550/arxiv.2407.10785","title":"Learning biologically relevant features in a pathology foundation model using sparse autoencoders","display_name":"Learning biologically relevant features in a pathology foundation model using sparse autoencoders","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4400717805","doi":"https://doi.org/10.48550/arxiv.2407.10785"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2407.10785","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.10785","pdf_url":"https://arxiv.org/pdf/2407.10785","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.10785","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101521403","display_name":"Nhat Le","orcid":"https://orcid.org/0009-0009-8193-0288"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Le, Nhat Minh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065233202","display_name":"Ciyue Shen","orcid":"https://orcid.org/0000-0002-5416-2481"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Ciyue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Patel, Neel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patel, Neel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028315645","display_name":"Chintan Shah","orcid":"https://orcid.org/0000-0002-2441-908X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shah, Chintan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sanghavi, Darpan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sanghavi, Darpan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037342300","display_name":"Blake Martin","orcid":"https://orcid.org/0000-0001-5683-8310"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Martin, Blake","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Eng, Alfred","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eng, Alfred","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008540053","display_name":"Daniel Shenker","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shenker, Daniel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063093531","display_name":"Harshith Padigela","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Padigela, Harshith","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Biju, Raymond","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Biju, Raymond","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Javed, Syed Ashar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Javed, Syed Ashar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103964529","display_name":"Jennifer Hipp","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hipp, Jennifer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009816989","display_name":"John H. Abel","orcid":"https://orcid.org/0000-0002-8654-9226"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abel, John","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086365447","display_name":"Harsha Pokkalla","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pokkalla, Harsha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113668152","display_name":"S. Grullon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grullon, Sean","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5019438937","display_name":"Dinkar Juyal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Juyal, Dinkar","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":16,"corresponding_author_ids":["https://openalex.org/A5101521403"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.3246000111103058,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.3246000111103058,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8899483680725098},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.7567530870437622},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.7222391963005066},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.40215203166007996},{"id":"https://openalex.org/keywords/cognitive-science","display_name":"Cognitive science","score":0.3707365393638611},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.33681657910346985},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3278486132621765},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.3219611346721649},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.32002031803131104},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2909214496612549},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.24863946437835693},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.16660559177398682},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.13063442707061768},{"id":"https://openalex.org/keywords/archaeology","display_name":"Archaeology","score":0.08474603295326233},{"id":"https://openalex.org/keywords/social-science","display_name":"Social science","score":0.0835081934928894}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8899483680725098},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.7567530870437622},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.7222391963005066},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.40215203166007996},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.3707365393638611},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.33681657910346985},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3278486132621765},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3219611346721649},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.32002031803131104},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2909214496612549},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.24863946437835693},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.16660559177398682},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.13063442707061768},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.08474603295326233},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0835081934928894}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2407.10785","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.10785","pdf_url":"https://arxiv.org/pdf/2407.10785","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2407.10785","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2407.10785","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.10785","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.10785","pdf_url":"https://arxiv.org/pdf/2407.10785","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2905433371","https://openalex.org/W2888392564","https://openalex.org/W4310278675","https://openalex.org/W4388422664","https://openalex.org/W4390569940","https://openalex.org/W4361193272","https://openalex.org/W2963326959","https://openalex.org/W4388685194","https://openalex.org/W4312407344","https://openalex.org/W2894289927"],"abstract_inverted_index":{"Pathology":[0],"plays":[1],"an":[2,43,180],"important":[3],"role":[4],"in":[5,64,135],"disease":[6],"diagnosis,":[7],"treatment":[8],"decision-making":[9],"and":[10,33,99,119,131,155,195,200],"drug":[11],"development.":[12],"Previous":[13],"works":[14],"on":[15,21,51,82,140],"interpretability":[16,41,48],"for":[17,188,198],"machine":[18],"learning":[19],"models":[20],"pathology":[22,87,128,157],"images":[23],"have":[24,58],"revolved":[25],"around":[26,191],"methods":[27],"such":[28,115,146,166],"as":[29,60,116,167],"attention":[30],"value":[31],"visualization":[32],"deriving":[34],"human-interpretable":[35],"features":[36,69,96,176],"from":[37,70],"model":[38,47,72,130,138,159],"heatmaps.":[39],"Mechanistic":[40],"is":[42],"emerging":[44],"area":[45],"of":[46,66,85,172],"that":[49,93,145],"focuses":[50],"reverse-engineering":[52],"neural":[53],"networks.":[54],"Sparse":[55,80,94],"Autoencoders":[56],"(SAEs)":[57],"emerged":[59],"a":[61,79,86,136],"promising":[62],"direction":[63],"terms":[65],"extracting":[67],"monosemantic":[68,100,148],"polysemantic":[71],"activations.":[73],"In":[74,103],"this":[75],"work,":[76],"we":[77],"trained":[78],"Autoencoder":[81,95],"the":[83,127,152,156,186],"embeddings":[84],"pretrained":[88,129,139],"foundation":[89,158],"model.":[90],"We":[91,143],"found":[92,134],"represent":[97],"interpretable":[98,192],"biological":[101,122],"concepts.":[102],"particular,":[104],"individual":[105],"SAE":[106,175],"dimensions":[107,194],"showed":[108],"strong":[109],"correlations":[110],"with":[111],"cell":[112],"type":[113],"counts":[114],"plasma":[117],"cells":[118],"lymphocytes.":[120],"These":[121],"representations":[123,149],"were":[124,132],"unique":[125],"to":[126,163,179],"not":[133],"self-supervised":[137],"natural":[141],"images.":[142],"demonstrated":[144],"biologically-grounded":[147],"evolved":[150],"across":[151],"model's":[153],"depth,":[154],"eventually":[160],"gained":[161],"robustness":[162],"non-biological":[164],"factors":[165],"scanner":[168],"type.":[169],"The":[170],"emergence":[171],"biologically":[173],"relevant":[174],"was":[177],"generalizable":[178],"out-of-domain":[181],"dataset.":[182],"Our":[183],"work":[184],"paves":[185],"way":[187],"further":[189],"exploration":[190],"feature":[193],"their":[196],"utility":[197],"medical":[199],"clinical":[201],"applications.":[202]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2024-07-17T00:00:00"}
