{"id":"https://openalex.org/W4392095606","doi":"https://doi.org/10.1038/s42256-024-00791-0","title":"Codon language embeddings provide strong signals for use in protein engineering","display_name":"Codon language embeddings provide strong signals for use in protein engineering","publication_year":2024,"publication_date":"2024-02-23","ids":{"openalex":"https://openalex.org/W4392095606","doi":"https://doi.org/10.1038/s42256-024-00791-0"},"language":"en","primary_location":{"id":"doi:10.1038/s42256-024-00791-0","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s42256-024-00791-0","pdf_url":"https://www.nature.com/articles/s42256-024-00791-0.pdf","source":{"id":"https://openalex.org/S2912241403","display_name":"Nature Machine Intelligence","issn_l":"2522-5839","issn":["2522-5839"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Nature Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.nature.com/articles/s42256-024-00791-0.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091004232","display_name":"Carlos Outeiral","orcid":"https://orcid.org/0000-0003-1408-5554"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Carlos Outeiral","raw_affiliation_strings":["Department of Statistics, University of Oxford, Oxford, UK"],"affiliations":[{"raw_affiliation_string":"Department of Statistics, University of Oxford, Oxford, UK","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015572211","display_name":"Charlotte M. Deane","orcid":"https://orcid.org/0000-0003-1388-2252"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I4210129725","display_name":"KM Biologics (Japan)","ror":"https://ror.org/03qq2mk98","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210129725"]}],"countries":["GB","JP"],"is_corresponding":false,"raw_author_name":"Charlotte M. Deane","raw_affiliation_strings":["Department of Statistics, University of Oxford, Oxford, UK","Division of Biologics, Exscientia, Ltd, Oxford, UK"],"affiliations":[{"raw_affiliation_string":"Department of Statistics, University of Oxford, Oxford, UK","institution_ids":["https://openalex.org/I40120149"]},{"raw_affiliation_string":"Division of Biologics, Exscientia, Ltd, Oxford, UK","institution_ids":["https://openalex.org/I4210129725"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5091004232"],"corresponding_institution_ids":["https://openalex.org/I40120149"],"apc_list":{"value":9750,"currency":"EUR","value_usd":11690},"apc_paid":{"value":9750,"currency":"EUR","value_usd":11690},"fwci":17.173,"has_fulltext":true,"cited_by_count":75,"citation_normalized_percentile":{"value":0.99607859,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"6","issue":"2","first_page":"170","last_page":"179"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4765132963657379},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.3454165756702423},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.24051859974861145}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4765132963657379},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3454165756702423},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.24051859974861145}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1038/s42256-024-00791-0","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s42256-024-00791-0","pdf_url":"https://www.nature.com/articles/s42256-024-00791-0.pdf","source":{"id":"https://openalex.org/S2912241403","display_name":"Nature Machine Intelligence","issn_l":"2522-5839","issn":["2522-5839"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Nature Machine Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1038/s42256-024-00791-0","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s42256-024-00791-0","pdf_url":"https://www.nature.com/articles/s42256-024-00791-0.pdf","source":{"id":"https://openalex.org/S2912241403","display_name":"Nature Machine Intelligence","issn_l":"2522-5839","issn":["2522-5839"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Nature Machine Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.8299999833106995,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1361938442","display_name":null,"funder_award_id":"Fellowship","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G1934935867","display_name":null,"funder_award_id":"Engineering and Physical Sciences R","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3079284767","display_name":"Maths Research Associates 2021 Oxford","funder_award_id":"EP/W522582/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G6649409132","display_name":"DTP 2020-2021 University of Oxford","funder_award_id":"EP/T517811/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G7155175004","display_name":null,"funder_award_id":"Doctoral Prize","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G7468431023","display_name":null,"funder_award_id":"EP/W522582/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4392095606.pdf"},"referenced_works_count":58,"referenced_works":["https://openalex.org/W1968654669","https://openalex.org/W1998448271","https://openalex.org/W2018590530","https://openalex.org/W2051349181","https://openalex.org/W2068628419","https://openalex.org/W2076048958","https://openalex.org/W2154232977","https://openalex.org/W2155964482","https://openalex.org/W2157613837","https://openalex.org/W2170747616","https://openalex.org/W2280346092","https://openalex.org/W2755432955","https://openalex.org/W2903493617","https://openalex.org/W2951433247","https://openalex.org/W2962843773","https://openalex.org/W3010879523","https://openalex.org/W3015921770","https://openalex.org/W3118936575","https://openalex.org/W3120153567","https://openalex.org/W3146944767","https://openalex.org/W3177500196","https://openalex.org/W3179485843","https://openalex.org/W3184369217","https://openalex.org/W3213545574","https://openalex.org/W3215751839","https://openalex.org/W3216325381","https://openalex.org/W4205192056","https://openalex.org/W4206950245","https://openalex.org/W4210460281","https://openalex.org/W4210997669","https://openalex.org/W4220991280","https://openalex.org/W4225264859","https://openalex.org/W4225438928","https://openalex.org/W4225868104","https://openalex.org/W4280542143","https://openalex.org/W4281291878","https://openalex.org/W4281623512","https://openalex.org/W4281648132","https://openalex.org/W4281665783","https://openalex.org/W4281755657","https://openalex.org/W4281993476","https://openalex.org/W4283390570","https://openalex.org/W4285491216","https://openalex.org/W4286005114","https://openalex.org/W4286669150","https://openalex.org/W4290546426","https://openalex.org/W4300861364","https://openalex.org/W4303981501","https://openalex.org/W4307223892","https://openalex.org/W4310780109","https://openalex.org/W4310783369","https://openalex.org/W4327550249","https://openalex.org/W6600421821","https://openalex.org/W6600424091","https://openalex.org/W6600511658","https://openalex.org/W6606545560","https://openalex.org/W6675354045","https://openalex.org/W6702248584"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"Abstract":[0],"Protein":[1],"representations":[2,65],"from":[3],"deep":[4],"language":[5,53,97,107],"models":[6,54,70],"have":[7],"yielded":[8],"state-of-the-art":[9,69],"performance":[10],"across":[11,71],"many":[12],"tasks":[13],"in":[14,122,148],"computational":[15],"protein":[16,85,106],"engineering.":[17],"In":[18,76],"recent":[19,29],"years,":[20],"progress":[21],"has":[22],"primarily":[23],"focused":[24],"on":[25,56,100],"parameter":[26],"count,":[27],"with":[28],"models\u2019":[30],"capacities":[31],"surpassing":[32],"the":[33,36,131,143],"size":[34],"of":[35,59,74,84,134,145],"very":[37],"datasets":[38],"they":[39],"were":[40],"trained":[41,55,99],"on.":[42],"Here":[43],"we":[44,93],"propose":[45],"an":[46,138],"alternative":[47],"direction.":[48],"We":[49],"show":[50,94],"that":[51,66,95,111],"large":[52],"codons,":[57],"instead":[58],"amino":[60],"acid":[61],"sequences,":[62],"provide":[63],"high-quality":[64],"outperform":[67],"comparable":[68],"a":[72,96],"variety":[73],"tasks.":[75],"some":[77,110],"tasks,":[78],"such":[79],"as":[80],"species":[81],"recognition,":[82],"prediction":[83],"and":[86,128],"transcript":[87],"abundance":[88],"or":[89],"melting":[90],"point":[91],"estimation,":[92],"model":[98,129],"codons":[101],"outperforms":[102],"every":[103],"other":[104],"published":[105],"model,":[108],"including":[109],"contain":[112],"over":[113],"50":[114],"times":[115],"more":[116],"parameters.":[117],"These":[118],"results":[119],"indicate":[120],"that,":[121],"addition":[123],"to":[124,141],"commonly":[125],"studied":[126],"scale":[127],"complexity,":[130],"information":[132],"content":[133],"biological":[135],"data":[136],"provides":[137],"orthogonal":[139],"direction":[140],"improve":[142],"power":[144],"machine":[146],"learning":[147],"biology.":[149]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":48},{"year":2024,"cited_by_count":21}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
