{"id":"https://openalex.org/W7113899491","doi":"https://doi.org/10.1145/3765612.3767805","title":"Learning the native-like codons with a 5\u2019UTR and RNA secondary structure aided species-informed transformer model","display_name":"Learning the native-like codons with a 5\u2019UTR and RNA secondary structure aided species-informed transformer model","publication_year":2025,"publication_date":"2025-10-12","ids":{"openalex":"https://openalex.org/W7113899491","doi":"https://doi.org/10.1145/3765612.3767805"},"language":null,"primary_location":{"id":"doi:10.1145/3765612.3767805","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3765612.3767805","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3765612.3767805","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3765612.3767805","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Qiuyue Hu","orcid":"https://orcid.org/0009-0007-6688-205X"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qiuyue Hu","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0007-6688-205X","affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Rui Zhou","orcid":"https://orcid.org/0009-0001-9823-1928"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Zhou","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China"],"raw_orcid":"https://orcid.org/0009-0001-9823-1928","affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhihao Wang","orcid":"https://orcid.org/0009-0005-7479-1444"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhihao Wang","raw_affiliation_strings":["Shandong University, Jinan, Shandong, China"],"raw_orcid":"https://orcid.org/0009-0005-7479-1444","affiliations":[{"raw_affiliation_string":"Shandong University, Jinan, Shandong, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Liangzhen Zheng","orcid":"https://orcid.org/0000-0003-1179-2106"},"institutions":[{"id":"https://openalex.org/I4210102308","display_name":"Lifetech Scientific (China)","ror":"https://ror.org/00qw2t413","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210102308"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liangzhen Zheng","raw_affiliation_strings":["Shenzhen Zelixir Biotech Co. Ltd, Shenzhen, Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0003-1179-2106","affiliations":[{"raw_affiliation_string":"Shenzhen Zelixir Biotech Co. Ltd, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210102308"]}]},{"author_position":"last","author":{"id":null,"display_name":"Yanjie Wei","orcid":"https://orcid.org/0000-0002-4791-7540"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanjie Wei","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0002-4791-7540","affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210145761"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210145761"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.57269056,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9684000015258789,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9684000015258789,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.006599999964237213,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.003599999938160181,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codon-usage-bias","display_name":"Codon usage bias","score":0.7107999920845032},{"id":"https://openalex.org/keywords/genetic-code","display_name":"Genetic code","score":0.4478999972343445},{"id":"https://openalex.org/keywords/translational-efficiency","display_name":"Translational efficiency","score":0.4169999957084656},{"id":"https://openalex.org/keywords/start-codon","display_name":"Start codon","score":0.4138999879360199},{"id":"https://openalex.org/keywords/shine-dalgarno-sequence","display_name":"Shine-Dalgarno sequence","score":0.36230000853538513},{"id":"https://openalex.org/keywords/coding-region","display_name":"Coding region","score":0.3407999873161316},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.3314000070095062},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.3240000009536743},{"id":"https://openalex.org/keywords/open-reading-frame","display_name":"Open reading frame","score":0.32359999418258667}],"concepts":[{"id":"https://openalex.org/C87253356","wikidata":"https://www.wikidata.org/wiki/Q1106472","display_name":"Codon usage bias","level":4,"score":0.7107999920845032},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.48890000581741333},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46779999136924744},{"id":"https://openalex.org/C10853874","wikidata":"https://www.wikidata.org/wiki/Q180618","display_name":"Genetic code","level":3,"score":0.4478999972343445},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.44449999928474426},{"id":"https://openalex.org/C2775992057","wikidata":"https://www.wikidata.org/wiki/Q16956568","display_name":"Translational efficiency","level":5,"score":0.4169999957084656},{"id":"https://openalex.org/C176990463","wikidata":"https://www.wikidata.org/wiki/Q1854186","display_name":"Start codon","level":4,"score":0.4138999879360199},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.38749998807907104},{"id":"https://openalex.org/C55334214","wikidata":"https://www.wikidata.org/wiki/Q416807","display_name":"Shine-Dalgarno sequence","level":5,"score":0.36230000853538513},{"id":"https://openalex.org/C91779695","wikidata":"https://www.wikidata.org/wiki/Q3780824","display_name":"Coding region","level":3,"score":0.3407999873161316},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.3240000009536743},{"id":"https://openalex.org/C47289529","wikidata":"https://www.wikidata.org/wiki/Q908550","display_name":"Open reading frame","level":4,"score":0.32359999418258667},{"id":"https://openalex.org/C126142528","wikidata":"https://www.wikidata.org/wiki/Q3771876","display_name":"Nucleic acid structure","level":4,"score":0.3203999996185303},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30709999799728394},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C62614982","wikidata":"https://www.wikidata.org/wiki/Q904984","display_name":"Protein secondary structure","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C138025448","wikidata":"https://www.wikidata.org/wiki/Q7662597","display_name":"Synonymous substitution","level":5,"score":0.2888999879360199},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.2879999876022339},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.28189998865127563},{"id":"https://openalex.org/C153957851","wikidata":"https://www.wikidata.org/wiki/Q201448","display_name":"Transfer RNA","level":4,"score":0.27869999408721924},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C20580545","wikidata":"https://www.wikidata.org/wiki/Q28354","display_name":"Stop codon","level":3,"score":0.26440000534057617},{"id":"https://openalex.org/C12464233","wikidata":"https://www.wikidata.org/wiki/Q937624","display_name":"Five prime untranslated region","level":5,"score":0.258899986743927},{"id":"https://openalex.org/C152662350","wikidata":"https://www.wikidata.org/wiki/Q815297","display_name":"Systems biology","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3765612.3767805","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3765612.3767805","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3765612.3767805","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3765612.3767805","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3765612.3767805","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3765612.3767805","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W7113899491.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Efficient":[0],"protein":[1],"expression":[2,37],"across":[3,86],"heterologous":[4],"hosts":[5],"remains":[6],"a":[7,40,44,54,99,178],"major":[8],"challenge":[9],"in":[10,18,29,98,194],"synthetic":[11],"biology,":[12],"largely":[13],"due":[14],"to":[15,80,129,152],"species-specific":[16],"differences":[17],"codon":[19,32,46,83,113,180],"usage":[20,84],"and":[21,66,75,94,109],"regulatory":[22],"sequence":[23,96],"context.":[24],"A":[25],"key":[26],"difficulty":[27],"lies":[28],"reconstructing":[30],"the":[31,35,126,184],"landscape":[33],"of":[34,112],"target":[36,195],"system":[38],"within":[39],"foreign":[41],"host":[42],"with":[43,71,145,190],"native-like":[45],"preference.":[47,114],"To":[48],"address":[49],"this,":[50],"we":[51],"present":[52],"TransCodon,":[53],"Transformer-based":[55],"deep":[56,168],"learning":[57],"model":[58,182],"that":[59,118,175],"leverages":[60],"both":[61,107],"5'":[62],"untranslated":[63],"regions":[64],"(5'UTRs)":[65],"coding":[67],"sequences":[68,150],"(CDS),":[69],"along":[70],"explicit":[72],"species":[73],"identifiers":[74],"RNA":[76],"secondary":[77],"structure":[78],"information,":[79],"learn":[81],"nuanced":[82],"patterns":[85],"diverse":[87],"organisms.":[88],"By":[89],"incorporating":[90],"multisource":[91],"genomic":[92],"data":[93],"modeling":[95,102],"dependencies":[97],"masked":[100],"language":[101,181],"paradigm,":[103],"TransCodon":[104,156,176],"effectively":[105],"captures":[106],"local":[108],"global":[110],"determinants":[111],"Our":[115],"experiments":[116],"demonstrate":[117],"integrating":[119],"species-level":[120],"information":[121],"during":[122],"training":[123],"significantly":[124],"improves":[125],"model's":[127],"ability":[128],"predict":[130],"optimal":[131],"synonymous":[132],"codons":[133,144,161],"when":[134],"considering":[135],"different":[136],"evaluation":[137],"metrics.":[138],"More":[139],"importantly":[140],"it":[141],"identifies":[142],"nativelike":[143,188],"less":[146],"divergence":[147],"from":[148],"natural":[149],"compared":[151],"other":[153,167],"methods.":[154,170],"Besides,":[155],"could":[157],"capture":[158],"more":[159],"low-frequency":[160],"which":[162],"are":[163],"often":[164],"omitted":[165],"by":[166],"learning-based":[169],"The":[171],"results":[172],"thus":[173],"indicate":[174],"as":[177],"robust":[179],"has":[183],"potential":[185],"for":[186],"generating":[187],"CDS":[189],"high":[191],"translational":[192],"efficiency":[193],"hosts.":[196]},"counts_by_year":[],"updated_date":"2026-05-22T06:13:13.366637","created_date":"2025-12-11T00:00:00"}
