{"id":"https://openalex.org/W7157154286","doi":"https://doi.org/10.48550/arxiv.2604.24506","title":"MIMIC: A Generative Multimodal Foundation Model for Biomolecules","display_name":"MIMIC: A Generative Multimodal Foundation Model for Biomolecules","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7157154286","doi":"https://doi.org/10.48550/arxiv.2604.24506"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.24506","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24506","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.24506","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020401988","display_name":"Siavash Golkar","orcid":"https://orcid.org/0000-0002-1867-2054"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Golkar, Siavash","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116249301","display_name":"Jake Kovalic","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kovalic, Jake","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033259596","display_name":"I. Morales","orcid":"https://orcid.org/0000-0002-7467-3596"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Morales, Irina Espejo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020346838","display_name":"Samuel Sledzieski","orcid":"https://orcid.org/0000-0002-0170-3029"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sledzieski, Samuel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134817655","display_name":"Minhuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Minhuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062645212","display_name":"Ksenia Sokolova","orcid":"https://orcid.org/0000-0001-7499-5387"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sokolova, Ksenia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134756800","display_name":"Geraud Krawezik","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krawezik, Geraud","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134753923","display_name":"Alberto Bietti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bietti, Alberto","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038623820","display_name":"Claudia Skok Gibbs","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gibbs, Claudia Skok","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119709693","display_name":"Roman Klypa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Klypa, Roman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085155102","display_name":"Shengwei Xiong","orcid":"https://orcid.org/0009-0002-3207-1113"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Shengwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134775888","display_name":"Francois Lanusse","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lanusse, Francois","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042321060","display_name":"Liam Parker","orcid":"https://orcid.org/0009-0007-4952-1674"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Parker, Liam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091175785","display_name":"Kyunghyun Cho","orcid":"https://orcid.org/0000-0003-1669-3211"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cho, Kyunghyun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078731429","display_name":"Miles Cranmer","orcid":"https://orcid.org/0000-0002-6458-3423"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cranmer, Miles","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115012461","display_name":"Tom Hehir","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hehir, Tom","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134772677","display_name":"Michael McCabe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McCabe, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134813697","display_name":"Lucas Meyer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meyer, Lucas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134777705","display_name":"Rudy Morel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Morel, Rudy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134800404","display_name":"Payel Mukhopadhyay","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mukhopadhyay, Payel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134805458","display_name":"Mariel Pettee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pettee, Mariel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042859670","display_name":"Helen Qu","orcid":"https://orcid.org/0000-0003-1899-9791"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qu, Helen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134795508","display_name":"Jeff Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Jeff","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134817267","display_name":"David Fouhey","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fouhey, David","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063988493","display_name":"Hadi Sotoudeh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sotoudeh, Hadi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134800222","display_name":"Vikram Mulligan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mulligan, Vikram","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134794559","display_name":"Pilar Cossio","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cossio, Pilar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041965787","display_name":"Sonya M. Hanson","orcid":"https://orcid.org/0000-0001-8960-5353"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hanson, Sonya M.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134766610","display_name":"Alisha N. Jones","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jones, Alisha N.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025250742","display_name":"Olga G. Troyanskaya","orcid":"https://orcid.org/0000-0002-5676-5737"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Troyanskaya, Olga G.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5049051097","display_name":"Shirley Ho","orcid":"https://orcid.org/0000-0002-1068-160X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ho, Shirley","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.6818000078201294,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.6818000078201294,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10604","display_name":"RNA Research and Splicing","score":0.07940000295639038,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.06520000100135803,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6328999996185303},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5694000124931335},{"id":"https://openalex.org/keywords/synthetic-biology","display_name":"Synthetic biology","score":0.5209000110626221},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5091000199317932},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4341999888420105},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4327999949455261},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.40450000762939453},{"id":"https://openalex.org/keywords/generative-design","display_name":"Generative Design","score":0.3912000060081482}],"concepts":[{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6328999996185303},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6270999908447266},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6241000294685364},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5694000124931335},{"id":"https://openalex.org/C191908910","wikidata":"https://www.wikidata.org/wiki/Q862838","display_name":"Synthetic biology","level":2,"score":0.5209000110626221},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5091000199317932},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4341999888420105},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4327999949455261},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.40450000762939453},{"id":"https://openalex.org/C184408114","wikidata":"https://www.wikidata.org/wiki/Q1502022","display_name":"Generative Design","level":3,"score":0.3912000060081482},{"id":"https://openalex.org/C2776303644","wikidata":"https://www.wikidata.org/wiki/Q1020499","display_name":"Interfacing","level":2,"score":0.38510000705718994},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.37380000948905945},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.36239999532699585},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.352400004863739},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.3312000036239624},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.30979999899864197},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.2964000105857849},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C66782513","wikidata":"https://www.wikidata.org/wiki/Q864601","display_name":"Biomedicine","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.26570001244544983},{"id":"https://openalex.org/C54458228","wikidata":"https://www.wikidata.org/wiki/Q237218","display_name":"RNA splicing","level":4,"score":0.260699987411499},{"id":"https://openalex.org/C2775905019","wikidata":"https://www.wikidata.org/wiki/Q192572","display_name":"In silico","level":3,"score":0.2597000002861023},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.25690001249313354}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.24506","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24506","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.24506","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24506","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Biological":[0],"function":[1],"emerges":[2],"from":[3],"coupled":[4],"constraints":[5],"across":[6,86],"sequence,":[7],"structure,":[8],"regulation,":[9],"evolution,":[10],"and":[11,44,55,77,90,112,121,161,170,175,232],"cellular":[12],"context,":[13],"yet":[14],"most":[15],"foundation":[16,37,225],"models":[17],"in":[18,148,185],"biology":[19],"are":[20],"trained":[21,39],"within":[22,58,236],"one":[23],"modality":[24],"or":[25,79],"for":[26,188,226],"a":[27,34,65,149,210,223,237],"fixed":[28,211],"forward":[29],"task.":[30],"We":[31],"present":[32],"MIMIC,":[33],"generative":[35,124,137,220],"multimodal":[36,219],"model":[38,200],"on":[40,71,110,168],"our":[41],"newly":[42],"curated":[43],"aligned":[45,218],"dataset,":[46],"LORE,":[47],"linking":[48],"nucleic":[49],"acid,":[50],"protein,":[51],"evolutionary,":[52],"structural,":[53],"regulatory,":[54],"semantic/contextual":[56],"modalities":[57,76],"partially":[59],"observed":[60,75],"biomolecular":[61,234],"states.":[62],"MIMIC":[63,116,144,192],"uses":[64,193],"split-track":[66],"encoder-decoder":[67],"architecture":[68],"to":[69,100,199],"condition":[70],"arbitrary":[72],"subsets":[73],"of":[74,83,173],"reconstruct":[78],"generate":[80],"missing":[81],"components":[82],"molecular":[84],"state":[85],"the":[87,135],"genome,":[88],"transcriptome,":[89],"proteome.":[91],"Multimodal":[92],"conditioning":[93,167,198],"consistently":[94],"improves":[95,131],"MIMIC's":[96,217],"sequence":[97],"reconstruction":[98],"relative":[99],"sequence-only":[101],"inputs,":[102],"while":[103],"its":[104,122],"learned":[105],"representations":[106],"enable":[107],"state-of-the-art":[108,118],"performance":[109],"RNA":[111,202],"protein":[113],"downstream":[114],"tasks.":[115],"achieves":[117],"splicing":[119],"prediction,":[120,134,231],"joint":[123],"formulation":[125],"enables":[126],"isoform-aware":[127],"inference":[128],"that":[129],"further":[130],"performance.":[132],"Beyond":[133],"same":[136],"framework":[138],"supports":[139],"constrained":[140,233],"design.":[141],"For":[142,164],"RNA,":[143],"identifies":[145],"corrective":[146],"edits":[147],"clinically":[150],"relevant":[151],"HBB":[152],"splice-disrupting":[153],"mutation":[154],"without":[155],"reverting":[156],"it":[157],"by":[158],"using":[159],"evolutionary":[160],"structural":[162],"signals.":[163],"proteins,":[165],"jointly":[166],"shape":[169],"surface":[171],"chemistry":[172],"PD-L1":[174],"hACE2":[176],"binding":[177],"sites":[178],"produces":[179],"diverse,":[180],"high-confidence":[181],"sequences":[182],"with":[183],"strong":[184,224],"silico":[186],"support":[187],"target":[189],"binding.":[190],"Finally,":[191],"experimental":[194],"context":[195,208],"as":[196,209,222],"semantic":[197],"assay-dependent":[201],"chemical":[203],"probing,":[204],"rather":[205],"than":[206],"treating":[207],"output.":[212],"Together,":[213],"these":[214],"results":[215],"position":[216],"modeling":[221],"unifying":[227],"representation":[228],"learning,":[229],"conditional":[230],"design":[235],"single":[238],"model.":[239]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-04-29T00:00:00"}
