{"id":"https://openalex.org/W7118809462","doi":"https://doi.org/10.48550/arxiv.2601.01089","title":"Central Dogma Transformer: Towards Mechanism-Oriented AI for Cellular Understanding","display_name":"Central Dogma Transformer: Towards Mechanism-Oriented AI for Cellular Understanding","publication_year":2026,"publication_date":"2026-01-03","ids":{"openalex":"https://openalex.org/W7118809462","doi":"https://doi.org/10.48550/arxiv.2601.01089"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.01089","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01089","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.01089","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122291895","display_name":"Nobuyuki Ota","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ota, Nobuyuki","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5122291895"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.36890000104904175,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.36890000104904175,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.1264999955892563,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12859","display_name":"Cell Image Analysis Techniques","score":0.11159999668598175,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/enhancer","display_name":"Enhancer","score":0.5282999873161316},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.43959999084472656},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.42989999055862427},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.31299999356269836},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.29989999532699585}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5688999891281128},{"id":"https://openalex.org/C111936080","wikidata":"https://www.wikidata.org/wiki/Q913367","display_name":"Enhancer","level":4,"score":0.5282999873161316},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.4909000098705292},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44339999556541443},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.43959999084472656},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.42989999055862427},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.385699987411499},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.31299999356269836},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2858999967575073},{"id":"https://openalex.org/C152662350","wikidata":"https://www.wikidata.org/wiki/Q815297","display_name":"Systems biology","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.27160000801086426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.01089","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01089","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.01089","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01089","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Understanding":[0],"cellular":[1,44],"mechanisms":[2,78],"requires":[3],"integrating":[4],"information":[5,194],"across":[6],"DNA,":[7,62],"RNA,":[8,63],"and":[9,64,115,147,182,201],"protein":[10,65,116],"-":[11,79,91,107,118],"the":[12,18,49,67,71,136],"three":[13,101],"molecular":[14,22],"systems":[15],"linked":[16],"by":[17,140],"Central":[19,50,72],"Dogma":[20,51],"of":[21,70,131,135],"biology.":[23],"While":[24],"domain-specific":[25],"foundation":[26],"models":[27,60,82,88],"have":[28],"achieved":[29],"success":[30],"for":[31,61],"each":[32],"modality":[33],"individually,":[34],"they":[35],"remain":[36],"isolated,":[37],"limiting":[38],"our":[39],"ability":[40],"to":[41],"model":[42],"integrated":[43],"processes.":[45],"Here":[46],"we":[47],"present":[48],"Transformer":[52],"(CDT),":[53],"an":[54],"architecture":[55],"that":[56,98,173,188],"integrates":[57,99],"pre-trained":[58],"language":[59],"following":[66],"directional":[68,76],"logic":[69],"Dogma.":[73],"CDT":[74,105],"employs":[75],"cross-attention":[77],"DNA-to-RNA":[80],"attention":[81,87],"transcriptional":[83],"regulation,":[84],"while":[85],"RNA-to-Protein":[86],"translational":[89],"relationships":[90],"producing":[92],"a":[93,108,128,169],"unified":[94],"Virtual":[95],"Cell":[96],"Embedding":[97],"all":[100],"modalities.":[102],"We":[103],"validate":[104],"v1":[106],"proof-of-concept":[109],"implementation":[110],"using":[111],"fixed":[112],"(non-cell-specific)":[113],"RNA":[114],"embeddings":[117],"on":[119],"CRISPRi":[120],"enhancer":[121,181],"perturbation":[122],"data":[123,175],"from":[124],"K562":[125],"cells,":[126],"achieving":[127],"Pearson":[129],"correlation":[130],"0.503,":[132],"representing":[133],"63%":[134],"theoretical":[137],"ceiling":[138],"set":[139],"cross-experiment":[141],"variability":[142],"(r":[143],"=":[144],"0.797).":[145],"Attention":[146],"gradient":[148,166],"analyses":[149],"provide":[150],"complementary":[151],"interpretive":[152],"windows:":[153],"in":[154],"detailed":[155],"case":[156],"studies,":[157],"these":[158],"approaches":[159],"highlight":[160],"largely":[161],"distinct":[162],"genomic":[163],"regions,":[164],"with":[165,192],"analysis":[167],"identifying":[168],"CTCF":[170],"binding":[171],"site":[172],"Hi-C":[174],"showed":[176],"as":[177],"physically":[178],"contacting":[179],"both":[180,198],"target":[183],"gene.":[184],"These":[185],"results":[186],"suggest":[187],"AI":[189],"architectures":[190],"aligned":[191],"biological":[193],"flow":[195],"can":[196],"achieve":[197],"predictive":[199],"accuracy":[200],"mechanistic":[202],"interpretability.":[203]},"counts_by_year":[],"updated_date":"2026-01-08T20:10:11.968330","created_date":"2026-01-08T00:00:00"}
