{"id":"https://openalex.org/W7134818331","doi":"https://doi.org/10.48550/arxiv.2603.06950","title":"How Private Are DNA Embeddings? Inverting Foundation Model Representations of Genomic Sequences","display_name":"How Private Are DNA Embeddings? Inverting Foundation Model Representations of Genomic Sequences","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7134818331","doi":"https://doi.org/10.48550/arxiv.2603.06950"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.06950","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043856445","display_name":"Sofiane Ouaari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ouaari, Sofiane","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120730461","display_name":"Jules Kreuer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kreuer, Jules","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5076705834","display_name":"Nico Pfeifer","orcid":"https://orcid.org/0000-0002-4647-8566"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pfeifer, Nico","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.060100000351667404,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.060100000351667404,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13664","display_name":"Genome Rearrangement Algorithms","score":0.05350000038743019,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.049800001084804535,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4740999937057495},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4068000018596649},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.4065999984741211},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.37310001254081726},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.36980000138282776},{"id":"https://openalex.org/keywords/confidentiality","display_name":"Confidentiality","score":0.3127000033855438},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.3093999922275543},{"id":"https://openalex.org/keywords/nucleic-acid-sequence","display_name":"Nucleic acid sequence","score":0.2890999913215637}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.54830002784729},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4740999937057495},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4068000018596649},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.4065999984741211},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3970000147819519},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.37310001254081726},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.36980000138282776},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3497999906539917},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33489999175071716},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3249000012874603},{"id":"https://openalex.org/C71745522","wikidata":"https://www.wikidata.org/wiki/Q2476929","display_name":"Confidentiality","level":2,"score":0.3127000033855438},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C84148353","wikidata":"https://www.wikidata.org/wiki/Q863908","display_name":"Nucleic acid sequence","level":3,"score":0.2890999913215637},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.2849999964237213},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2822999954223633},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2736999988555908},{"id":"https://openalex.org/C189338579","wikidata":"https://www.wikidata.org/wiki/Q290447","display_name":"Ancient DNA","level":3,"score":0.2637999951839447},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.26350000500679016},{"id":"https://openalex.org/C117745874","wikidata":"https://www.wikidata.org/wiki/Q901612","display_name":"Sequence motif","level":3,"score":0.26179999113082886},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.25760000944137573},{"id":"https://openalex.org/C54985914","wikidata":"https://www.wikidata.org/wiki/Q1295754","display_name":"Consensus sequence","level":4,"score":0.25609999895095825},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.06950","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.06950","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.06950","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.06950","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"DNA":[0,81,108,127],"foundation":[1,82,128,223],"models":[2,18,83,224],"have":[3],"become":[4],"transformative":[5],"tools":[6],"in":[7,221,230],"bioinformatics":[8],"and":[9,133,170,202,237],"healthcare":[10],"applications.":[11],"Trained":[12],"on":[13],"vast":[14],"genomic":[15,32,222],"datasets,":[16],"these":[17,68],"can":[19],"be":[20,174],"used":[21],"to":[22,44,84,91,118,173,226],"generate":[23],"sequence":[24,109,146,158,203],"embeddings,":[25,153],"dense":[26],"vector":[27],"representations":[28,69],"that":[29,141,196],"capture":[30],"complex":[31],"information.":[33],"These":[34],"embeddings":[35,143],"are":[36,240],"increasingly":[37],"being":[38,71],"shared":[39],"via":[40],"Embeddings-as-a-Service":[41],"(EaaS)":[42],"frameworks":[43],"facilitate":[45],"downstream":[46],"tasks,":[47],"while":[48,186],"supposedly":[49],"protecting":[50],"the":[51,54,65,78,102,107,123,191,197,215],"privacy":[52,124],"of":[53,67,80,125,209],"underlying":[55],"raw":[56],"sequences.":[57],"However,":[58],"as":[59,157],"this":[60],"practice":[61],"becomes":[62],"more":[63],"prevalent,":[64],"security":[66],"is":[70,110,115],"called":[72],"into":[73],"question.":[74],"This":[75],"study":[76],"evaluates":[77],"resilience":[79],"model":[85,97,235],"inversion":[86],"attacks,":[87],"whereby":[88],"adversaries":[89],"attempt":[90],"reconstruct":[92],"sensitive":[93],"training":[94],"data":[95],"from":[96],"outputs.":[98],"In":[99],"our":[100],"study,":[101],"model's":[103],"output":[104],"for":[105,178,218],"reconstructing":[106],"a":[111,119,206],"zero-shot":[112],"embedding,":[113],"which":[114],"then":[116],"fed":[117],"decoder.":[120],"We":[121,194],"evaluated":[122],"three":[126],"models:":[129],"DNABERT-2,":[130],"Evo":[131,168],"2,":[132],"Nucleotide":[134],"Transformer":[135],"v2":[136],"(NTv2).":[137],"Our":[138,212],"results":[139],"show":[140],"per-token":[142],"allow":[144],"near-perfect":[145],"reconstruction":[147,154,182,210],"across":[148],"all":[149],"models.":[150],"For":[151],"mean-pooled":[152],"quality":[155],"degrades":[156],"length":[159],"increases,":[160],"though":[161],"it":[162],"remains":[163],"substantially":[164],"above":[165],"random":[166],"baselines.":[167],"2":[169],"NTv2":[171],"prove":[172],"most":[175],"vulnerable,":[176],"especially":[177],"shorter":[179],"sequences":[180],"with":[181],"similarities":[183],"&gt;":[184],"90%,":[185],"DNABERT-2's":[187],"BPE":[188],"tokenization":[189],"provides":[190],"greatest":[192],"resilience.":[193],"found":[195],"correlation":[198],"between":[199],"embedding":[200],"similarity":[201,204],"was":[205],"key":[207],"predictor":[208],"success.":[211],"findings":[213],"emphasize":[214],"urgent":[216],"need":[217],"privacy-aware":[219],"design":[220],"prior":[225],"their":[227],"widespread":[228],"deployment":[229],"EaaS":[231],"settings.":[232],"Training":[233],"code,":[234],"weights":[236],"evaluation":[238],"pipeline":[239],"released":[241],"on:":[242],"https://github.com/not-a-feature/DNA-Embedding-Inversion.":[243]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-11T00:00:00"}
