{"id":"https://openalex.org/W7140795604","doi":"https://doi.org/10.48550/arxiv.2603.23583","title":"ZeroFold: Protein-RNA Binding Affinity Predictions from Pre-Structural Embeddings","display_name":"ZeroFold: Protein-RNA Binding Affinity Predictions from Pre-Structural Embeddings","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7140795604","doi":"https://doi.org/10.48550/arxiv.2603.23583"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.23583","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23583","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.23583","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125670462","display_name":"Josef Hanke","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Hanke, Josef","raw_affiliation_strings":["Yusuf Hamied Department of Chemistry, University of Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Yusuf Hamied Department of Chemistry, University of Cambridge, UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108898226","display_name":"Sebastian Pujalte Ojeda","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ojeda, Sebastian Pujalte","raw_affiliation_strings":["Yusuf Hamied Department of Chemistry, University of Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Yusuf Hamied Department of Chemistry, University of Cambridge, UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054025089","display_name":"S. J. Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhang, Shengyu","raw_affiliation_strings":["Yusuf Hamied Department of Chemistry, University of Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Yusuf Hamied Department of Chemistry, University of Cambridge, UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080565832","display_name":"Werngard Czechtizky","orcid":"https://orcid.org/0000-0003-4487-2399"},"institutions":[{"id":"https://openalex.org/I4210113007","display_name":"AstraZeneca (Netherlands)","ror":"https://ror.org/021tmn508","country_code":"NL","type":"company","lineage":["https://openalex.org/I105036370","https://openalex.org/I4210113007"]},{"id":"https://openalex.org/I4210137263","display_name":"AstraZeneca (Finland)","ror":"https://ror.org/035n56j11","country_code":"FI","type":"company","lineage":["https://openalex.org/I105036370","https://openalex.org/I4210137263"]},{"id":"https://openalex.org/I4210143795","display_name":"AstraZeneca (Sweden)","ror":"https://ror.org/04wwrrg31","country_code":"SE","type":"company","lineage":["https://openalex.org/I105036370","https://openalex.org/I4210143795"]}],"countries":["FI","NL","SE"],"is_corresponding":false,"raw_author_name":"Czechtizky, Werngard","raw_affiliation_strings":["Medicinal Chemistry, Research and Early Development, Respiratory and Immunology, BioPharmaceuticals R and D, AstraZeneca, Sweden"],"affiliations":[{"raw_affiliation_string":"Medicinal Chemistry, Research and Early Development, Respiratory and Immunology, BioPharmaceuticals R and D, AstraZeneca, Sweden","institution_ids":["https://openalex.org/I4210137263","https://openalex.org/I4210143795","https://openalex.org/I4210113007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031181295","display_name":"Leonardo De Maria","orcid":"https://orcid.org/0000-0002-8061-4242"},"institutions":[{"id":"https://openalex.org/I4210113007","display_name":"AstraZeneca (Netherlands)","ror":"https://ror.org/021tmn508","country_code":"NL","type":"company","lineage":["https://openalex.org/I105036370","https://openalex.org/I4210113007"]},{"id":"https://openalex.org/I4210137263","display_name":"AstraZeneca (Finland)","ror":"https://ror.org/035n56j11","country_code":"FI","type":"company","lineage":["https://openalex.org/I105036370","https://openalex.org/I4210137263"]},{"id":"https://openalex.org/I4210143795","display_name":"AstraZeneca (Sweden)","ror":"https://ror.org/04wwrrg31","country_code":"SE","type":"company","lineage":["https://openalex.org/I105036370","https://openalex.org/I4210143795"]}],"countries":["FI","NL","SE"],"is_corresponding":false,"raw_author_name":"De Maria, Leonardo","raw_affiliation_strings":["Medicinal Chemistry, Research and Early Development, Respiratory and Immunology, BioPharmaceuticals R and D, AstraZeneca, Sweden"],"affiliations":[{"raw_affiliation_string":"Medicinal Chemistry, Research and Early Development, Respiratory and Immunology, BioPharmaceuticals R and D, AstraZeneca, Sweden","institution_ids":["https://openalex.org/I4210137263","https://openalex.org/I4210143795","https://openalex.org/I4210113007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5125628289","display_name":"Michele Vendruscolo","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Vendruscolo, Michele","raw_affiliation_strings":["Yusuf Hamied Department of Chemistry, University of Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Yusuf Hamied Department of Chemistry, University of Cambridge, UK","institution_ids":["https://openalex.org/I241749"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5125670462"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.6080999970436096,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.6080999970436096,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.19930000603199005,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.05139999836683273,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5730999708175659},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.49639999866485596},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.4823000133037567},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4489000141620636},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.42879998683929443},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4207000136375427},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4009000062942505},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.38260000944137573},{"id":"https://openalex.org/keywords/affinities","display_name":"Affinities","score":0.3650999963283539}],"concepts":[{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5730999708175659},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5561000108718872},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.49639999866485596},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.4823000133037567},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4489000141620636},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.42879998683929443},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4207000136375427},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4009000062942505},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.38260000944137573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3822999894618988},{"id":"https://openalex.org/C2780283098","wikidata":"https://www.wikidata.org/wiki/Q4688960","display_name":"Affinities","level":2,"score":0.3650999963283539},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.36390000581741333},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.35019999742507935},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3391000032424927},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33559998869895935},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.3345000147819519},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.3321000039577484},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.3264999985694885},{"id":"https://openalex.org/C139489369","wikidata":"https://www.wikidata.org/wiki/Q770846","display_name":"Structural similarity","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C2777742833","wikidata":"https://www.wikidata.org/wiki/Q1964083","display_name":"Reciprocal","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28200000524520874},{"id":"https://openalex.org/C3018795828","wikidata":"https://www.wikidata.org/wiki/Q899107","display_name":"Binding affinities","level":3,"score":0.2777999937534332},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2727000117301941},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.2540999948978424}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.23583","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23583","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.23583","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23583","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"accurate":[1],"prediction":[2,231],"of":[3,31,135,165],"protein-RNA":[4,138,233],"binding":[5,119],"affinity":[6,120,230],"remains":[7],"an":[8],"unsolved":[9],"problem":[10],"in":[11,16],"structural":[12,29,238],"biology,":[13],"limiting":[14],"opportunities":[15],"understanding":[17],"gene":[18],"regulation":[19],"and":[20,109,127,195],"designing":[21],"RNA-targeting":[22],"therapeutics.":[23],"A":[24],"central":[25],"obstacle":[26,60],"is":[27,211],"the":[28,79,170,200],"flexibility":[30],"RNA,":[32],"as,":[33],"unlike":[34],"proteins,":[35],"RNA":[36,110],"molecules":[37,111],"exist":[38],"as":[39,204],"dynamic":[40],"conformational":[41,87],"ensembles.":[42],"Thus,":[43],"committing":[44],"to":[45,53,117,192,207,229],"a":[46,73,97,113,132,150,162,167,220,227],"single":[47],"predicted":[48,92],"structure":[49,80],"discards":[50],"information":[51,89],"relevant":[52],"binding.":[54],"Here,":[55],"we":[56,129],"show":[57],"that":[58,100,182],"this":[59],"can":[61],"be":[62],"addressed":[63],"by":[64,173],"extracting":[65],"pre-structural":[66,102,217],"embeddings,":[67],"which":[68,236],"are":[69],"intermediate":[70],"representations":[71],"from":[72,104,122,145],"biomolecular":[74],"foundation":[75],"model":[76,99],"captured":[77],"before":[78],"decoding":[81],"step.":[82],"Pre-structural":[83],"embeddings":[84,103,218],"implicitly":[85],"encode":[86],"ensemble":[88],"without":[90],"requiring":[91],"structures.":[93],"We":[94],"build":[95],"ZeroFold,":[96],"transformer-based":[98],"combines":[101],"Boltz-2":[105],"for":[106,184,223,232,235],"both":[107],"protein":[108],"through":[112],"cross-modal":[114],"attention":[115],"mechanism":[116],"predict":[118],"directly":[121],"sequence.":[123],"To":[124],"support":[125],"training":[126,209],"evaluation,":[128],"construct":[130],"PRADB,":[131],"curated":[133],"dataset":[134],"2,621":[136],"unique":[137],"pairs":[139,234],"with":[140,155,190,199],"experimentally":[141],"measured":[142],"affinities":[143],"drawn":[144],"four":[146],"complementary":[147],"databases.":[148],"On":[149],"held-out":[151],"test":[152],"set":[153],"constructed":[154],"40%":[156],"sequence":[157,205],"identity":[158],"thresholds,":[159],"ZeroFold":[160,187],"achieves":[161],"Spearman":[163],"correlation":[164],"0.65,":[166],"value":[168],"approaching":[169],"ceiling":[171],"imposed":[172],"experimental":[174],"measurement":[175],"noise.":[176],"Under":[177],"progressively":[178],"fairer":[179],"evaluation":[180],"conditions":[181],"control":[183],"training-set":[185],"overlap,":[186],"compares":[188],"favourably":[189],"respect":[191],"leading":[193,196],"structure-based":[194],"sequence-based":[197],"predictors,":[198],"performance":[201],"gap":[202],"widening":[203],"similarity":[206],"competitor":[208],"data":[210,239],"reduced.":[212],"These":[213],"results":[214],"illustrate":[215],"how":[216],"offer":[219],"representation":[221],"strategy":[222],"flexible":[224],"biomolecules,":[225],"opening":[226],"route":[228],"no":[237],"exist.":[240]},"counts_by_year":[],"updated_date":"2026-03-27T06:05:27.210665","created_date":"2026-03-27T00:00:00"}
