{"id":"https://openalex.org/W7104905766","doi":"https://doi.org/10.1109/tai.2025.3631454","title":"A Comprehensive Review of Transformer-Based Language Models for Protein Sequence Analysis and Design","display_name":"A Comprehensive Review of Transformer-Based Language Models for Protein Sequence Analysis and Design","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W7104905766","doi":"https://doi.org/10.1109/tai.2025.3631454"},"language":null,"primary_location":{"id":"doi:10.1109/tai.2025.3631454","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2025.3631454","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Nimisha Ghosh","orcid":"https://orcid.org/0000-0002-0697-6368"},"institutions":[{"id":"https://openalex.org/I26604189","display_name":"Shiv Nadar University","ror":"https://ror.org/05aqahr97","country_code":"IN","type":"education","lineage":["https://openalex.org/I26604189"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Nimisha Ghosh","raw_affiliation_strings":["Department of Computer Science and Engineering, Shiv Nadar University, Chennai, India"],"raw_orcid":"https://orcid.org/0000-0002-0697-6368","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shiv Nadar University, Chennai, India","institution_ids":["https://openalex.org/I26604189"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Daniele Santoni","orcid":"https://orcid.org/0000-0003-1898-5285"},"institutions":[{"id":"https://openalex.org/I4210115688","display_name":"Institute for Systems Analysis","ror":"https://ror.org/02kmet255","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210115688"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Daniele Santoni","raw_affiliation_strings":["Institute for System Analysis and Computer Science &#x201C;Antonio Ruberti&#x201D;, National Research Council of Italy, Rome, Italy"],"raw_orcid":"https://orcid.org/0000-0003-1898-5285","affiliations":[{"raw_affiliation_string":"Institute for System Analysis and Computer Science &#x201C;Antonio Ruberti&#x201D;, National Research Council of Italy, Rome, Italy","institution_ids":["https://openalex.org/I4210115688"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Debaleena Nawn","orcid":"https://orcid.org/0000-0003-1152-8073"},"institutions":[{"id":"https://openalex.org/I3130244785","display_name":"Adamas University","ror":"https://ror.org/02tne2741","country_code":"IN","type":"education","lineage":["https://openalex.org/I3130244785"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Debaleena Nawn","raw_affiliation_strings":["Department of Computer Science and Engineering, Adamas University, Kolkata, India"],"raw_orcid":"https://orcid.org/0000-0003-1152-8073","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Adamas University, Kolkata, India","institution_ids":["https://openalex.org/I3130244785"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Eleonora Ottaviani","orcid":"https://orcid.org/0009-0008-6643-1694"},"institutions":[{"id":"https://openalex.org/I4210115688","display_name":"Institute for Systems Analysis","ror":"https://ror.org/02kmet255","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210115688"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Eleonora Ottaviani","raw_affiliation_strings":["Institute for System Analysis and Computer Science &#x201C;Antonio Ruberti&#x201D;, National Research Council of Italy, Rome, Italy"],"raw_orcid":"https://orcid.org/0009-0008-6643-1694","affiliations":[{"raw_affiliation_string":"Institute for System Analysis and Computer Science &#x201C;Antonio Ruberti&#x201D;, National Research Council of Italy, Rome, Italy","institution_ids":["https://openalex.org/I4210115688"]}]},{"author_position":"last","author":{"id":null,"display_name":"Giovanni Felici","orcid":"https://orcid.org/0000-0003-0544-5407"},"institutions":[{"id":"https://openalex.org/I4210115688","display_name":"Institute for Systems Analysis","ror":"https://ror.org/02kmet255","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210115688"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Giovanni Felici","raw_affiliation_strings":["Institute for System Analysis and Computer Science &#x201C;Antonio Ruberti&#x201D;, National Research Council of Italy, Rome, Italy"],"raw_orcid":"https://orcid.org/0000-0003-0544-5407","affiliations":[{"raw_affiliation_string":"Institute for System Analysis and Computer Science &#x201C;Antonio Ruberti&#x201D;, National Research Council of Italy, Rome, Italy","institution_ids":["https://openalex.org/I4210115688"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1519,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.81530423,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"7","issue":"5","first_page":"2478","last_page":"2495"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.7434999942779541,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.7434999942779541,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.12229999899864197,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.061500001698732376,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5529999732971191},{"id":"https://openalex.org/keywords/strengths-and-weaknesses","display_name":"Strengths and weaknesses","score":0.46160000562667847},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.43529999256134033},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3497999906539917},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3441999852657318},{"id":"https://openalex.org/keywords/human-language","display_name":"Human language","score":0.30959999561309814}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6467999815940857},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5529999732971191},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4966000020503998},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.46160000562667847},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.43529999256134033},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39239999651908875},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3497999906539917},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3441999852657318},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3156999945640564},{"id":"https://openalex.org/C2993724205","wikidata":"https://www.wikidata.org/wiki/Q315","display_name":"Human language","level":2,"score":0.30959999561309814},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.2791000008583069},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2025.3631454","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2025.3631454","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8388280272483826,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":77,"referenced_works":["https://openalex.org/W202072618","https://openalex.org/W1609464906","https://openalex.org/W1770535494","https://openalex.org/W1991256448","https://openalex.org/W2064675550","https://openalex.org/W2072814963","https://openalex.org/W2076074498","https://openalex.org/W2097413543","https://openalex.org/W2116197635","https://openalex.org/W2131774270","https://openalex.org/W2730472814","https://openalex.org/W2896457183","https://openalex.org/W2897344260","https://openalex.org/W2950635152","https://openalex.org/W2951433247","https://openalex.org/W2963250244","https://openalex.org/W2995514860","https://openalex.org/W2999044305","https://openalex.org/W3010387158","https://openalex.org/W3044778276","https://openalex.org/W3135130381","https://openalex.org/W3146944767","https://openalex.org/W3164453494","https://openalex.org/W3177500196","https://openalex.org/W3177828909","https://openalex.org/W3186179742","https://openalex.org/W3211795435","https://openalex.org/W4205773061","https://openalex.org/W4220831705","https://openalex.org/W4225264859","https://openalex.org/W4281768709","https://openalex.org/W4285294723","https://openalex.org/W4288066876","https://openalex.org/W4293519117","https://openalex.org/W4296777412","https://openalex.org/W4296780589","https://openalex.org/W4296907865","https://openalex.org/W4300861364","https://openalex.org/W4306385408","https://openalex.org/W4308340089","https://openalex.org/W4308589354","https://openalex.org/W4310154745","https://openalex.org/W4313452839","https://openalex.org/W4315641887","https://openalex.org/W4316589888","https://openalex.org/W4317212783","https://openalex.org/W4318328407","https://openalex.org/W4321484068","https://openalex.org/W4327550249","https://openalex.org/W4361228745","https://openalex.org/W4362664882","https://openalex.org/W4366083739","https://openalex.org/W4366590073","https://openalex.org/W4377142567","https://openalex.org/W4382501959","https://openalex.org/W4384498728","https://openalex.org/W4385245566","https://openalex.org/W4387959257","https://openalex.org/W4388024559","https://openalex.org/W4388092799","https://openalex.org/W4390245367","https://openalex.org/W4394619527","https://openalex.org/W4396721167","https://openalex.org/W4399567091","https://openalex.org/W4400984795","https://openalex.org/W4402278817","https://openalex.org/W4404447386","https://openalex.org/W4404531735","https://openalex.org/W4405228734","https://openalex.org/W4405628858","https://openalex.org/W4406440058","https://openalex.org/W4406852071","https://openalex.org/W4407247258","https://openalex.org/W4408177377","https://openalex.org/W4408569178","https://openalex.org/W4408745455","https://openalex.org/W7133227460"],"related_works":[],"abstract_inverted_index":{"The":[0,14],"impact":[1],"of":[2,16,59,76,84,96,138,141],"Transformer-based":[3,40],"language":[4],"models":[5,18,41],"has":[6,19],"been":[7],"unprecedented":[8],"in":[9,25,39,111,130,144],"Natural":[10],"Language":[11],"Processing":[12],"(NLP).":[13],"success":[15],"such":[17,63],"also":[20],"led":[21],"to":[22,62,88,100,105,133,148],"their":[23,150],"adoption":[24],"other":[26],"fields":[27],"including":[28],"bioinformatics.":[29],"Taking":[30],"this":[31,34,49,124,131,145],"into":[32],"account,":[33],"paper":[35],"discusses":[36],"recent":[37],"advances":[38],"for":[42,118],"protein":[43,73],"sequence":[44],"analysis":[45],"and":[46,54,71,82,94,114,147],"design.":[47],"In":[48],"review,":[50],"we":[51,108],"have":[52,134],"discussed":[53,98],"analysed":[55],"a":[56,102],"significant":[57],"number":[58],"works":[60,99],"pertaining":[61],"applications.":[64],"These":[65],"applications":[66],"encompass":[67],"gene":[68],"ontology,":[69],"functional":[70],"structural":[72],"identification,":[74],"generation":[75],"<italic":[77],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[78],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">de":[79],"novo</i>":[80],"proteins":[81],"binding":[83],"proteins.":[85],"We":[86,121],"attempt":[87],"shed":[89],"light":[90],"on":[91],"the":[92,97,139,142],"strength":[93],"weaknesses":[95],"provide":[101],"comprehensive":[103],"insight":[104],"readers.":[106],"Finally,":[107],"highlight":[109],"shortcomings":[110],"existing":[112],"research":[113],"explore":[115],"potential":[116],"avenues":[117],"future":[119,151],"developments.":[120],"believe":[122],"that":[123],"review":[125],"will":[126],"help":[127],"researchers":[128],"working":[129],"field":[132],"an":[135],"overall":[136],"idea":[137],"state":[140],"art":[143],"field,":[146],"orient":[149],"studies.":[152]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-12T00:00:00"}
