{"id":"https://openalex.org/W4396871204","doi":"https://doi.org/10.1038/s42256-024-00836-4","title":"Multi-purpose RNA language modelling with motif-aware pretraining and type-guided fine-tuning","display_name":"Multi-purpose RNA language modelling with motif-aware pretraining and type-guided fine-tuning","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4396871204","doi":"https://doi.org/10.1038/s42256-024-00836-4"},"language":"en","primary_location":{"id":"doi:10.1038/s42256-024-00836-4","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s42256-024-00836-4","pdf_url":"https://www.nature.com/articles/s42256-024-00836-4.pdf","source":{"id":"https://openalex.org/S2912241403","display_name":"Nature Machine Intelligence","issn_l":"2522-5839","issn":["2522-5839"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Nature Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.nature.com/articles/s42256-024-00836-4.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064441924","display_name":"Ning Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]},{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Ning Wang","raw_affiliation_strings":["Big Data Lab, Baidu Inc., Beijing, China","Department of Computer Science, City University of Hong Kong, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Big Data Lab, Baidu Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]},{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055104982","display_name":"\u5c06\u5c1a \u6e21\u8fba","orcid":"https://orcid.org/0000-0002-6337-9375"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiang Bian","raw_affiliation_strings":["Big Data Lab, Baidu Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Big Data Lab, Baidu Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100348348","display_name":"Yuchen Li","orcid":"https://orcid.org/0000-0002-3869-7881"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchen Li","raw_affiliation_strings":["Big Data Lab, Baidu Inc., Beijing, China","Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Big Data Lab, Baidu Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100712470","display_name":"Xuhong Li","orcid":"https://orcid.org/0000-0002-2582-8256"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuhong Li","raw_affiliation_strings":["Big Data Lab, Baidu Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Big Data Lab, Baidu Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056209905","display_name":"Shahid Mumtaz","orcid":"https://orcid.org/0000-0001-6364-6149"},"institutions":[{"id":"https://openalex.org/I119004910","display_name":"Silesian University of Technology","ror":"https://ror.org/02dyjk442","country_code":"PL","type":"education","lineage":["https://openalex.org/I119004910"]},{"id":"https://openalex.org/I52590639","display_name":"Nottingham Trent University","ror":"https://ror.org/04xyxjd90","country_code":"GB","type":"education","lineage":["https://openalex.org/I52590639"]}],"countries":["GB","PL"],"is_corresponding":false,"raw_author_name":"Shahid Mumtaz","raw_affiliation_strings":["Department of Applied Informatics, Silesian University of Technology, Gliwice, Poland","Department of Computer Science, Nottingham Trent University, Nottingham, UK"],"affiliations":[{"raw_affiliation_string":"Department of Applied Informatics, Silesian University of Technology, Gliwice, Poland","institution_ids":["https://openalex.org/I119004910"]},{"raw_affiliation_string":"Department of Computer Science, Nottingham Trent University, Nottingham, UK","institution_ids":["https://openalex.org/I52590639"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072308822","display_name":"Linghe Kong","orcid":"https://orcid.org/0000-0001-9266-3044"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linghe Kong","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081254155","display_name":"Haoyi Xiong","orcid":"https://orcid.org/0000-0002-5451-3253"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyi Xiong","raw_affiliation_strings":["Big Data Lab, Baidu Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Big Data Lab, Baidu Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5064441924"],"corresponding_institution_ids":["https://openalex.org/I168719708","https://openalex.org/I98301712"],"apc_list":{"value":9750,"currency":"EUR","value_usd":11690},"apc_paid":{"value":9750,"currency":"EUR","value_usd":11690},"fwci":24.4067,"has_fulltext":true,"cited_by_count":107,"citation_normalized_percentile":{"value":0.99803379,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"6","issue":"5","first_page":"548","last_page":"557"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10604","display_name":"RNA Research and Splicing","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11482","display_name":"RNA modifications and cancer","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/motif","display_name":"Motif (music)","score":0.6550331115722656},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5315290093421936},{"id":"https://openalex.org/keywords/rna","display_name":"RNA","score":0.4161664843559265},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.13716769218444824},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.13053399324417114},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.0979871153831482},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.09499916434288025},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.055319130420684814}],"concepts":[{"id":"https://openalex.org/C32276052","wikidata":"https://www.wikidata.org/wiki/Q908349","display_name":"Motif (music)","level":2,"score":0.6550331115722656},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5315290093421936},{"id":"https://openalex.org/C67705224","wikidata":"https://www.wikidata.org/wiki/Q11053","display_name":"RNA","level":3,"score":0.4161664843559265},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.13716769218444824},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.13053399324417114},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0979871153831482},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.09499916434288025},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.055319130420684814}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1038/s42256-024-00836-4","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s42256-024-00836-4","pdf_url":"https://www.nature.com/articles/s42256-024-00836-4.pdf","source":{"id":"https://openalex.org/S2912241403","display_name":"Nature Machine Intelligence","issn_l":"2522-5839","issn":["2522-5839"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Nature Machine Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:irep.ntu.ac.uk:51433","is_oa":true,"landing_page_url":null,"pdf_url":"https://irep.ntu.ac.uk/id/eprint/51433/1/1894381_Mumtaz.pdf","source":{"id":"https://openalex.org/S4306400559","display_name":"Nottingham Trent University's Institutional Repository (Nottingham Trent Repository)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I52590639","host_organization_name":"Nottingham Trent University","host_organization_lineage":["https://openalex.org/I52590639"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:pure.atira.dk:publications/4879a9ff-68d8-4368-bdcd-356ce7919b27","is_oa":true,"landing_page_url":"https://hdl.handle.net/2031/4879a9ff-68d8-4368-bdcd-356ce7919b27","pdf_url":null,"source":{"id":"https://openalex.org/S7407055387","display_name":"CityU Scholars","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Wang, N, Bian, J, Li, Y, Li, X, Mumtaz, S, Kong, L & Xiong, H 2024, 'Multi-purpose RNA language modelling with motif-aware pretraining and type-guided fine-tuning', Nature Machine Intelligence, vol. 6, no. 5, pp. 548-557. https://doi.org/10.1038/s42256-024-00836-4","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1038/s42256-024-00836-4","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s42256-024-00836-4","pdf_url":"https://www.nature.com/articles/s42256-024-00836-4.pdf","source":{"id":"https://openalex.org/S2912241403","display_name":"Nature Machine Intelligence","issn_l":"2522-5839","issn":["2522-5839"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Nature Machine Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6700000166893005}],"awards":[{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4263355572","display_name":null,"funder_award_id":"62141220","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8513333314","display_name":null,"funder_award_id":"2021ZD01","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8955107213","display_name":null,"funder_award_id":"Major","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4396871204.pdf"},"referenced_works_count":63,"referenced_works":["https://openalex.org/W1966857768","https://openalex.org/W1967102258","https://openalex.org/W2025763720","https://openalex.org/W2031352989","https://openalex.org/W2041344749","https://openalex.org/W2058276221","https://openalex.org/W2061657172","https://openalex.org/W2070136856","https://openalex.org/W2086561953","https://openalex.org/W2089766336","https://openalex.org/W2098571862","https://openalex.org/W2102017611","https://openalex.org/W2108230379","https://openalex.org/W2120456037","https://openalex.org/W2134629862","https://openalex.org/W2141157874","https://openalex.org/W2154085311","https://openalex.org/W2177784250","https://openalex.org/W2233603983","https://openalex.org/W2331530999","https://openalex.org/W2407848977","https://openalex.org/W2536860838","https://openalex.org/W2740619578","https://openalex.org/W2759571676","https://openalex.org/W2806518291","https://openalex.org/W2884170967","https://openalex.org/W2890017579","https://openalex.org/W2951298881","https://openalex.org/W2954102902","https://openalex.org/W2981852735","https://openalex.org/W2993894543","https://openalex.org/W2997200074","https://openalex.org/W3005769002","https://openalex.org/W3011411500","https://openalex.org/W3013450562","https://openalex.org/W3082320051","https://openalex.org/W3095979265","https://openalex.org/W3111061871","https://openalex.org/W3126773939","https://openalex.org/W3127238141","https://openalex.org/W3135697420","https://openalex.org/W3180037578","https://openalex.org/W3212533323","https://openalex.org/W3215060724","https://openalex.org/W4211254980","https://openalex.org/W4213095938","https://openalex.org/W4225763451","https://openalex.org/W4283362091","https://openalex.org/W4286669150","https://openalex.org/W4290546063","https://openalex.org/W4293194795","https://openalex.org/W4300861274","https://openalex.org/W4307079438","https://openalex.org/W4362700579","https://openalex.org/W4366986661","https://openalex.org/W4382246105","https://openalex.org/W4382490702","https://openalex.org/W4383506054","https://openalex.org/W4388539614","https://openalex.org/W6739901393","https://openalex.org/W6911256845","https://openalex.org/W6930188195","https://openalex.org/W6967137381"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W3170299350","https://openalex.org/W2368410102","https://openalex.org/W2368037387","https://openalex.org/W190186656","https://openalex.org/W2902352756","https://openalex.org/W2377079823","https://openalex.org/W2605676258","https://openalex.org/W2599962286"],"abstract_inverted_index":{"Abstract":[0],"Pretrained":[1],"language":[2,66],"models":[3],"have":[4],"shown":[5],"promise":[6],"in":[7,62,134,153,168,173,181],"analysing":[8],"nucleotide":[9],"sequences,":[10],"yet":[11,43],"a":[12,20,105,135,190],"versatile":[13],"model":[14,34],"excelling":[15],"across":[16,142],"diverse":[17],"tasks":[18,93,147],"with":[19,94,162,189],"single":[21],"pretrained":[22,33,192],"weight":[23],"set":[24],"remains":[25],"elusive.":[26],"Here":[27],"we":[28],"introduce":[29],"RNAErnie,":[30],"an":[31,116],"RNA-focused":[32],"built":[35],"upon":[36],"the":[37,100,122,126,149],"transformer":[38],"architecture,":[39],"employing":[40],"two":[41],"simple":[42],"effective":[44],"strategies.":[45],"First,":[46],"RNAErnie":[47,103,152],"enhances":[48],"pretraining":[49,101],"by":[50],"incorporating":[51],"RNA":[52,74,95,113,117],"motifs":[53],"as":[54,80],"biological":[55],"priors":[56],"and":[57,119,145,156,176,187],"introducing":[58],"motif-level":[59],"random":[60],"masking":[61],"addition":[63],"to":[64,85,91,125,130,164],"masked":[65],"modelling":[67],"at":[68],"base/subsequence":[69],"levels.":[70],"It":[71,159],"also":[72],"tokenizes":[73],"types":[75,114],"(for":[76],"example,":[77],"miRNA,":[78],"lnRNA)":[79],"stop":[81],"words,":[82],"appending":[83],"them":[84],"sequences":[86,96],"during":[87,99],"pretraining.":[88],"Second,":[89],"subject":[90],"out-of-distribution":[92],"not":[97],"seen":[98],"phase,":[102],"proposes":[104],"type-guided":[106],"fine-tuning":[107],"strategy":[108],"that":[109],"first":[110],"predicts":[111],"possible":[112],"using":[115],"sequence":[118,129],"then":[120],"appends":[121],"predicted":[123],"type":[124],"tail":[127],"of":[128,151],"refine":[131],"feature":[132],"embedding":[133],"post":[136],"hoc":[137],"way.":[138],"Our":[139],"extensive":[140],"evaluation":[141],"seven":[143],"datasets":[144],"five":[146],"demonstrates":[148],"superiority":[150],"both":[154],"supervised":[155],"unsupervised":[157],"learning.":[158],"surpasses":[160],"baselines":[161],"up":[163],"1.8%":[165],"higher":[166],"accuracy":[167,172],"classification,":[169],"2.2%":[170],"greater":[171],"interaction":[174],"prediction":[175],"3.3%":[177],"improved":[178],"F1":[179],"score":[180],"structure":[182],"prediction,":[183],"showcasing":[184],"its":[185],"robustness":[186],"adaptability":[188],"unified":[191],"foundation.":[193]},"counts_by_year":[{"year":2026,"cited_by_count":23},{"year":2025,"cited_by_count":69},{"year":2024,"cited_by_count":15}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
