{"id":"https://openalex.org/W4416715651","doi":"https://doi.org/10.1186/s12859-025-06286-y","title":"Protein language models uncover carbohydrate-active enzyme function in metagenomics","display_name":"Protein language models uncover carbohydrate-active enzyme function in metagenomics","publication_year":2025,"publication_date":"2025-11-26","ids":{"openalex":"https://openalex.org/W4416715651","doi":"https://doi.org/10.1186/s12859-025-06286-y","pmid":"https://pubmed.ncbi.nlm.nih.gov/41299229"},"language":"en","primary_location":{"id":"doi:10.1186/s12859-025-06286-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06286-y","pdf_url":"https://link.springer.com/content/pdf/10.1186/s12859-025-06286-y.pdf","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1186/s12859-025-06286-y.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006128016","display_name":"Kumar Thurimella","orcid":"https://orcid.org/0000-0002-0819-4378"},"institutions":[{"id":"https://openalex.org/I107606265","display_name":"Broad Institute","ror":"https://ror.org/05a0ya142","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I107606265"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]},{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]},{"id":"https://openalex.org/I4210087915","display_name":"Massachusetts General Hospital","ror":"https://ror.org/002pd6e78","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210087915","https://openalex.org/I48633490"]},{"id":"https://openalex.org/I51713134","display_name":"University of Colorado Anschutz Medical Campus","ror":"https://ror.org/03wmf1y16","country_code":"US","type":"education","lineage":["https://openalex.org/I51713134"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Kumar Thurimella","raw_affiliation_strings":["Broad Institute of MIT and Harvard, Cambridge, MA, USA","Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA","Department of Chemical Engineering and Biotechnology, University of Cambridge, Cambridge, UK","School of Medicine, University of Colorado Anschutz Medical Campus, Aurora, CO, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Broad Institute of MIT and Harvard, Cambridge, MA, USA","institution_ids":["https://openalex.org/I107606265"]},{"raw_affiliation_string":"Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA","institution_ids":["https://openalex.org/I136199984","https://openalex.org/I4210087915"]},{"raw_affiliation_string":"Department of Chemical Engineering and Biotechnology, University of Cambridge, Cambridge, UK","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"School of Medicine, University of Colorado Anschutz Medical Campus, Aurora, CO, USA","institution_ids":["https://openalex.org/I51713134"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057328653","display_name":"Ahmed M. Mohamed","orcid":"https://orcid.org/0000-0003-2390-2210"},"institutions":[{"id":"https://openalex.org/I107606265","display_name":"Broad Institute","ror":"https://ror.org/05a0ya142","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I107606265"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]},{"id":"https://openalex.org/I4210087915","display_name":"Massachusetts General Hospital","ror":"https://ror.org/002pd6e78","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210087915","https://openalex.org/I48633490"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ahmed M. T. Mohamed","raw_affiliation_strings":["Broad Institute of MIT and Harvard, Cambridge, MA, USA","Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Broad Institute of MIT and Harvard, Cambridge, MA, USA","institution_ids":["https://openalex.org/I107606265"]},{"raw_affiliation_string":"Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA","institution_ids":["https://openalex.org/I136199984","https://openalex.org/I4210087915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087365202","display_name":"Chenhao Li","orcid":"https://orcid.org/0000-0003-1182-6804"},"institutions":[{"id":"https://openalex.org/I107606265","display_name":"Broad Institute","ror":"https://ror.org/05a0ya142","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I107606265"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]},{"id":"https://openalex.org/I4210087915","display_name":"Massachusetts General Hospital","ror":"https://ror.org/002pd6e78","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210087915","https://openalex.org/I48633490"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chenhao Li","raw_affiliation_strings":["Broad Institute of MIT and Harvard, Cambridge, MA, USA","Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Broad Institute of MIT and Harvard, Cambridge, MA, USA","institution_ids":["https://openalex.org/I107606265"]},{"raw_affiliation_string":"Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA","institution_ids":["https://openalex.org/I136199984","https://openalex.org/I4210087915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049927688","display_name":"Tommi Vatanen","orcid":"https://orcid.org/0000-0003-0949-1291"},"institutions":[{"id":"https://openalex.org/I107606265","display_name":"Broad Institute","ror":"https://ror.org/05a0ya142","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I107606265"]},{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]},{"id":"https://openalex.org/I154130895","display_name":"University of Auckland","ror":"https://ror.org/03b94tp07","country_code":"NZ","type":"education","lineage":["https://openalex.org/I154130895"]}],"countries":["FI","NZ","US"],"is_corresponding":false,"raw_author_name":"Tommi Vatanen","raw_affiliation_strings":["Broad Institute of MIT and Harvard, Cambridge, MA, USA","Department of Microbiology, Faculty of Agriculture and Forestry, University of Helsinki, Helsinki, Finland","Institute of Biotechnology, Helsinki Institute of Life Science, University of Helsinki, Helsinki, Finland","Liggins Institute, University of Auckland, Auckland, New Zealand","Research Program for Clinical and Molecular Metabolism, Faculty of Medicine, University of Helsinki, Helsinki, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Broad Institute of MIT and Harvard, Cambridge, MA, USA","institution_ids":["https://openalex.org/I107606265"]},{"raw_affiliation_string":"Department of Microbiology, Faculty of Agriculture and Forestry, University of Helsinki, Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]},{"raw_affiliation_string":"Institute of Biotechnology, Helsinki Institute of Life Science, University of Helsinki, Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]},{"raw_affiliation_string":"Liggins Institute, University of Auckland, Auckland, New Zealand","institution_ids":["https://openalex.org/I154130895"]},{"raw_affiliation_string":"Research Program for Clinical and Molecular Metabolism, Faculty of Medicine, University of Helsinki, Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039033181","display_name":"D. B. Graham","orcid":"https://orcid.org/0000-0002-1046-746X"},"institutions":[{"id":"https://openalex.org/I107606265","display_name":"Broad Institute","ror":"https://ror.org/05a0ya142","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I107606265"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]},{"id":"https://openalex.org/I4210087915","display_name":"Massachusetts General Hospital","ror":"https://ror.org/002pd6e78","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210087915","https://openalex.org/I48633490"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel B. Graham","raw_affiliation_strings":["Broad Institute of MIT and Harvard, Cambridge, MA, USA","Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Broad Institute of MIT and Harvard, Cambridge, MA, USA","institution_ids":["https://openalex.org/I107606265"]},{"raw_affiliation_string":"Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA","institution_ids":["https://openalex.org/I136199984","https://openalex.org/I4210087915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076048603","display_name":"R\u00f3is\u0131\u0301n M. Owens","orcid":"https://orcid.org/0000-0001-7856-2108"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"R\u00f3is\u00edn M. Owens","raw_affiliation_strings":["Department of Chemical Engineering and Biotechnology, University of Cambridge, Cambridge, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Chemical Engineering and Biotechnology, University of Cambridge, Cambridge, UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048085886","display_name":"Sabina Leanti La Rosa","orcid":"https://orcid.org/0000-0003-3527-8101"},"institutions":[{"id":"https://openalex.org/I54108979","display_name":"Norwegian University of Life Sciences","ror":"https://ror.org/04a1mvv97","country_code":"NO","type":"education","lineage":["https://openalex.org/I54108979"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Sabina Leanti La Rosa","raw_affiliation_strings":["Faculty of Chemistry, Biotechnology and Food Science, Norwegian University of Life Sciences, \u00c5s, Norway"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Chemistry, Biotechnology and Food Science, Norwegian University of Life Sciences, \u00c5s, Norway","institution_ids":["https://openalex.org/I54108979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006524653","display_name":"Damian R. Plichta","orcid":"https://orcid.org/0000-0002-6555-2557"},"institutions":[{"id":"https://openalex.org/I107606265","display_name":"Broad Institute","ror":"https://ror.org/05a0ya142","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I107606265"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]},{"id":"https://openalex.org/I4210087915","display_name":"Massachusetts General Hospital","ror":"https://ror.org/002pd6e78","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210087915","https://openalex.org/I48633490"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Damian R. Plichta","raw_affiliation_strings":["Broad Institute of MIT and Harvard, Cambridge, MA, USA. damian@broadinstitute.org","Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA. damian@broadinstitute.org","Broad Institute of MIT and Harvard, Cambridge, MA, USA","Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Broad Institute of MIT and Harvard, Cambridge, MA, USA. damian@broadinstitute.org","institution_ids":["https://openalex.org/I107606265"]},{"raw_affiliation_string":"Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA. damian@broadinstitute.org","institution_ids":["https://openalex.org/I4210087915"]},{"raw_affiliation_string":"Broad Institute of MIT and Harvard, Cambridge, MA, USA","institution_ids":["https://openalex.org/I107606265"]},{"raw_affiliation_string":"Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA","institution_ids":["https://openalex.org/I136199984","https://openalex.org/I4210087915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086894545","display_name":"Sergio Bacallado","orcid":"https://orcid.org/0000-0002-7193-6450"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Sergio Bacallado","raw_affiliation_strings":["Department of Pure Mathematics and Mathematical Statistics, University of Cambridge, Cambridge, UK. sb2116@cam.ac.uk","Department of Pure Mathematics and Mathematical Statistics, University of Cambridge, Cambridge, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Pure Mathematics and Mathematical Statistics, University of Cambridge, Cambridge, UK. sb2116@cam.ac.uk","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"Department of Pure Mathematics and Mathematical Statistics, University of Cambridge, Cambridge, UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5106469500","display_name":"Ramnik J. Xavier","orcid":null},"institutions":[{"id":"https://openalex.org/I107606265","display_name":"Broad Institute","ror":"https://ror.org/05a0ya142","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I107606265"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]},{"id":"https://openalex.org/I4210087915","display_name":"Massachusetts General Hospital","ror":"https://ror.org/002pd6e78","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210087915","https://openalex.org/I48633490"]},{"id":"https://openalex.org/I4210155419","display_name":"Center for Systems Biology","ror":"https://ror.org/05r3dyn47","country_code":"US","type":"facility","lineage":["https://openalex.org/I136199984","https://openalex.org/I4210087915","https://openalex.org/I4210155419","https://openalex.org/I48633490"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ramnik J. Xavier","raw_affiliation_strings":["Broad Institute of MIT and Harvard, Cambridge, MA, USA. xavier@molbio.mgh.harvard.edu","Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA. xavier@molbio.mgh.harvard.edu","Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA","Broad Institute of MIT and Harvard, Cambridge, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Broad Institute of MIT and Harvard, Cambridge, MA, USA. xavier@molbio.mgh.harvard.edu","institution_ids":["https://openalex.org/I107606265"]},{"raw_affiliation_string":"Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA. xavier@molbio.mgh.harvard.edu","institution_ids":["https://openalex.org/I4210155419","https://openalex.org/I4210087915"]},{"raw_affiliation_string":"Center for Computational and Integrative Biology and Department of Molecular Biology, Massachusetts General Hospital, Harvard Medical School, Boston, MA, USA","institution_ids":["https://openalex.org/I136199984","https://openalex.org/I4210087915"]},{"raw_affiliation_string":"Broad Institute of MIT and Harvard, Cambridge, MA, USA","institution_ids":["https://openalex.org/I107606265"]}]}],"institutions":[],"countries_distinct_count":5,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5086894545"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":4.4727,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.9496751,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"26","issue":"1","first_page":"285","last_page":"285"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.3043000102043152,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.3043000102043152,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.13809999823570251,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.12790000438690186,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.7793999910354614},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5770999789237976},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5723999738693237},{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.4885999858379364},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.3799999952316284},{"id":"https://openalex.org/keywords/protein-function","display_name":"Protein function","score":0.365200012922287}],"concepts":[{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.7793999910354614},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.684499979019165},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5770999789237976},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5723999738693237},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5368000268936157},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.4885999858379364},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.4480000138282776},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.3799999952316284},{"id":"https://openalex.org/C2986374874","wikidata":"https://www.wikidata.org/wiki/Q8054","display_name":"Protein function","level":3,"score":0.365200012922287},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3650999963283539},{"id":"https://openalex.org/C126831891","wikidata":"https://www.wikidata.org/wiki/Q221673","display_name":"Host (biology)","level":2,"score":0.31859999895095825},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.31299999356269836},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3109000027179718},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.30169999599456787},{"id":"https://openalex.org/C46111723","wikidata":"https://www.wikidata.org/wiki/Q471857","display_name":"Proteomics","level":3,"score":0.2890999913215637},{"id":"https://openalex.org/C152662350","wikidata":"https://www.wikidata.org/wiki/Q815297","display_name":"Systems biology","level":2,"score":0.28299999237060547}],"mesh":[{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D004798","descriptor_name":"Enzymes","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D050260","descriptor_name":"Carbohydrate Metabolism","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D050260","descriptor_name":"Carbohydrate Metabolism","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D050260","descriptor_name":"Carbohydrate Metabolism","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D050260","descriptor_name":"Carbohydrate Metabolism","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D058977","descriptor_name":"Molecular Sequence Annotation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D058977","descriptor_name":"Molecular Sequence Annotation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D058977","descriptor_name":"Molecular Sequence Annotation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D058977","descriptor_name":"Molecular Sequence Annotation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":3,"locations":[{"id":"doi:10.1186/s12859-025-06286-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06286-y","pdf_url":"https://link.springer.com/content/pdf/10.1186/s12859-025-06286-y.pdf","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:41299229","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41299229","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:12deae80218349f6899c455fc6bfc17e","is_oa":true,"landing_page_url":"https://doaj.org/article/12deae80218349f6899c455fc6bfc17e","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 26, Iss 1, Pp 1-20 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s12859-025-06286-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06286-y","pdf_url":"https://link.springer.com/content/pdf/10.1186/s12859-025-06286-y.pdf","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320307891","display_name":"Rotary Foundation","ror":"https://ror.org/048b0n981"},{"id":"https://openalex.org/F4320323264","display_name":"Gates Cambridge Trust","ror":"https://ror.org/033sn5p83"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416715651.pdf","grobid_xml":"https://content.openalex.org/works/W4416715651.grobid-xml"},"referenced_works_count":61,"referenced_works":["https://openalex.org/W2011301426","https://openalex.org/W2036897871","https://openalex.org/W2055043387","https://openalex.org/W2060637207","https://openalex.org/W2085284704","https://openalex.org/W2092295455","https://openalex.org/W2095726164","https://openalex.org/W2108929776","https://openalex.org/W2113679889","https://openalex.org/W2122559203","https://openalex.org/W2125826054","https://openalex.org/W2135621733","https://openalex.org/W2143485490","https://openalex.org/W2145268834","https://openalex.org/W2158714788","https://openalex.org/W2166123725","https://openalex.org/W2170747616","https://openalex.org/W2173591891","https://openalex.org/W2288139400","https://openalex.org/W2766352633","https://openalex.org/W2790569358","https://openalex.org/W2803787045","https://openalex.org/W2804515387","https://openalex.org/W2903406988","https://openalex.org/W2910615673","https://openalex.org/W2916965173","https://openalex.org/W2947189704","https://openalex.org/W2950954328","https://openalex.org/W3003257820","https://openalex.org/W3021030593","https://openalex.org/W3025481076","https://openalex.org/W3040245690","https://openalex.org/W3042305844","https://openalex.org/W3134194768","https://openalex.org/W3143063265","https://openalex.org/W3146944767","https://openalex.org/W3157437194","https://openalex.org/W3164046276","https://openalex.org/W3164444785","https://openalex.org/W3166142427","https://openalex.org/W3177500196","https://openalex.org/W3177828909","https://openalex.org/W3211779138","https://openalex.org/W4205192056","https://openalex.org/W4213112325","https://openalex.org/W4221074414","https://openalex.org/W4281487561","https://openalex.org/W4281790889","https://openalex.org/W4282922306","https://openalex.org/W4293475204","https://openalex.org/W4300861364","https://openalex.org/W4310458106","https://openalex.org/W4312211449","https://openalex.org/W4318071656","https://openalex.org/W4321480048","https://openalex.org/W4327550249","https://openalex.org/W4362471278","https://openalex.org/W4367052125","https://openalex.org/W4375858802","https://openalex.org/W4386860638","https://openalex.org/W4390876999"],"related_works":[],"abstract_inverted_index":{"BACKGROUND:":[0],"The":[1],"functional":[2,176],"annotation":[3,23,54,177],"of":[4,18,66,139,211],"uncharacterized":[5],"microbial":[6,19],"enzymes":[7,38,116],"from":[8,101,124],"metagenomic":[9,98],"data":[10],"remains":[11],"a":[12,97,169,208],"significant":[13],"challenge,":[14],"limiting":[15],"our":[16,190],"understanding":[17,210],"metabolic":[20],"dynamics.":[21],"Traditional":[22],"methods":[24,88],"often":[25],"rely":[26],"on":[27,163],"sequence":[28,43],"homology,":[29],"which":[30],"can":[31,192],"fail":[32],"to":[33,56,83,96,150,202,207,215],"identify":[34],"remote":[35],"homologs":[36],"or":[37],"with":[39,126],"structural":[40],"rather":[41],"than":[42],"conservation.":[44],"To":[45],"address":[46],"this":[47],"gap,":[48],"we":[49],"developed":[50],"CAZyLingua,":[51],"the":[52,63,183],"first":[53],"tool":[55,171],"use":[57],"protein":[58],"language":[59],"models":[60],"(pLMs)":[61],"for":[62,179],"accurate":[64],"classification":[65],"carbohydrate-active":[67],"enzyme":[68,148],"(CAZyme)":[69],"families":[70],"and":[71,80,129,197,204,218],"subfamilies.":[72],"RESULTS:":[73],"CAZyLingua":[74,104,167],"demonstrated":[75],"high":[76],"performance,":[77],"maintaining":[78],"precision":[79],"recall":[81],"comparable":[82],"state-of-the-art":[84],"hidden":[85],"Markov":[86],"model-based":[87],"while":[89],"outperforming":[90],"purely":[91],"sequence-based":[92],"approaches.":[93],"When":[94],"applied":[95],"gene":[99],"catalog":[100],"mother/infant":[102],"pairs,":[103],"identified":[105],"over":[106],"27,000":[107],"putative":[108],"CAZymes":[109],"missed":[110],"by":[111,188],"other":[112],"tools,":[113],"including":[114],"horizontally-transferred":[115],"implicated":[117],"in":[118,143,153],"infant":[119],"microbiome":[120],"development.":[121],"In":[122],"datasets":[123],"patients":[125],"Crohn's":[127,154],"disease":[128,155],"IgG4-related":[130,144],"disease,":[131,205],"CAZyLinuga":[132],"uncovered":[133],"disease-associated":[134],"CAZymes,":[135],"highlighting":[136],"an":[137],"expansion":[138],"carbohydrate":[140],"esterases":[141],"(CEs)":[142],"disease.":[145],"A":[146],"CE17":[147],"predicted":[149],"be":[151],"overabundant":[152],"was":[156],"functionally":[157],"validated,":[158],"confirming":[159],"its":[160],"catalytic":[161],"activity":[162],"acetylated":[164],"manno-oligosaccharides.":[165],"CONCLUSIONS:":[166],"is":[168],"powerful":[170],"that":[172],"effectively":[173],"augments":[174],"existing":[175],"pipelines":[178],"CAZymes.":[180],"By":[181],"leveraging":[182],"deep":[184],"contextual":[185],"information":[186],"captured":[187],"pLMs,":[189],"method":[191],"uncover":[193],"novel":[194],"CAZyme":[195],"diversity":[196],"reveal":[198],"enzymatic":[199],"functions":[200],"relevant":[201],"health":[203,217],"contributing":[206],"further":[209],"biological":[212],"processes":[213],"related":[214],"host":[216],"nutrition.":[219]},"counts_by_year":[{"year":2026,"cited_by_count":3}],"updated_date":"2026-05-08T15:41:06.802602","created_date":"2025-11-27T00:00:00"}
