{"id":"https://openalex.org/W4316926589","doi":"https://doi.org/10.1093/bioinformatics/btad029","title":"CATHe: detection of remote homologues for CATH superfamilies using embeddings from protein language models","display_name":"CATHe: detection of remote homologues for CATH superfamilies using embeddings from protein language models","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4316926589","doi":"https://doi.org/10.1093/bioinformatics/btad029","pmid":"https://pubmed.ncbi.nlm.nih.gov/36648327"},"language":"en","primary_location":{"id":"doi:10.1093/bioinformatics/btad029","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btad029","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/39/1/btad029/48959111/btad029.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://academic.oup.com/bioinformatics/article-pdf/39/1/btad029/48959111/btad029.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112601740","display_name":"Vamsi Nallapareddy","orcid":null},"institutions":[{"id":"https://openalex.org/I4210157240","display_name":"Institute of Structural and Molecular Biology","ror":"https://ror.org/05wsetc54","country_code":"GB","type":"facility","lineage":["https://openalex.org/I124357947","https://openalex.org/I124357947","https://openalex.org/I4210157240","https://openalex.org/I45129253","https://openalex.org/I98259816"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Vamsi Nallapareddy","raw_affiliation_strings":["Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK"],"raw_orcid":"https://orcid.org/0000-0003-4750-038X","affiliations":[{"raw_affiliation_string":"Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK","institution_ids":["https://openalex.org/I4210157240","https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038645988","display_name":"Nicola Bordin","orcid":"https://orcid.org/0000-0002-6568-9035"},"institutions":[{"id":"https://openalex.org/I4210157240","display_name":"Institute of Structural and Molecular Biology","ror":"https://ror.org/05wsetc54","country_code":"GB","type":"facility","lineage":["https://openalex.org/I124357947","https://openalex.org/I124357947","https://openalex.org/I4210157240","https://openalex.org/I45129253","https://openalex.org/I98259816"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Nicola Bordin","raw_affiliation_strings":["Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK"],"raw_orcid":"https://orcid.org/0000-0002-6568-9035","affiliations":[{"raw_affiliation_string":"Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK","institution_ids":["https://openalex.org/I4210157240","https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036351252","display_name":"Ian Sillitoe","orcid":"https://orcid.org/0000-0003-1091-9144"},"institutions":[{"id":"https://openalex.org/I4210157240","display_name":"Institute of Structural and Molecular Biology","ror":"https://ror.org/05wsetc54","country_code":"GB","type":"facility","lineage":["https://openalex.org/I124357947","https://openalex.org/I124357947","https://openalex.org/I4210157240","https://openalex.org/I45129253","https://openalex.org/I98259816"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ian Sillitoe","raw_affiliation_strings":["Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK"],"raw_orcid":"https://orcid.org/0000-0003-1091-9144","affiliations":[{"raw_affiliation_string":"Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK","institution_ids":["https://openalex.org/I4210157240","https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075726670","display_name":"Michael Heinzinger","orcid":"https://orcid.org/0000-0002-9601-3580"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Michael Heinzinger","raw_affiliation_strings":["Department of Informatics , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany","Technical University of Munich (TUM) , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany"],"raw_orcid":"https://orcid.org/0000-0002-9601-3580","affiliations":[{"raw_affiliation_string":"Department of Informatics , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany","institution_ids":["https://openalex.org/I62916508"]},{"raw_affiliation_string":"Technical University of Munich (TUM) , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025018140","display_name":"Maria Littmann","orcid":"https://orcid.org/0000-0001-8533-8163"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Maria Littmann","raw_affiliation_strings":["Department of Informatics , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany","Technical University of Munich (TUM) , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany"],"raw_orcid":"https://orcid.org/0000-0001-8533-8163","affiliations":[{"raw_affiliation_string":"Department of Informatics , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany","institution_ids":["https://openalex.org/I62916508"]},{"raw_affiliation_string":"Technical University of Munich (TUM) , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038514035","display_name":"Vaishali Waman","orcid":"https://orcid.org/0000-0001-5627-8827"},"institutions":[{"id":"https://openalex.org/I4210157240","display_name":"Institute of Structural and Molecular Biology","ror":"https://ror.org/05wsetc54","country_code":"GB","type":"facility","lineage":["https://openalex.org/I124357947","https://openalex.org/I124357947","https://openalex.org/I4210157240","https://openalex.org/I45129253","https://openalex.org/I98259816"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Vaishali P Waman","raw_affiliation_strings":["Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK","institution_ids":["https://openalex.org/I4210157240","https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036599989","display_name":"Neeladri Sen","orcid":"https://orcid.org/0000-0002-3324-5755"},"institutions":[{"id":"https://openalex.org/I4210157240","display_name":"Institute of Structural and Molecular Biology","ror":"https://ror.org/05wsetc54","country_code":"GB","type":"facility","lineage":["https://openalex.org/I124357947","https://openalex.org/I124357947","https://openalex.org/I4210157240","https://openalex.org/I45129253","https://openalex.org/I98259816"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Neeladri Sen","raw_affiliation_strings":["Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK"],"raw_orcid":"https://orcid.org/0000-0002-3324-5755","affiliations":[{"raw_affiliation_string":"Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK","institution_ids":["https://openalex.org/I4210157240","https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064905883","display_name":"Burkhard Rost","orcid":"https://orcid.org/0000-0003-0179-8424"},"institutions":[{"id":"https://openalex.org/I309496635","display_name":"Weihenstephan-Triesdorf University of Applied Sciences","ror":"https://ror.org/00gzkxz88","country_code":"DE","type":"education","lineage":["https://openalex.org/I309496635"]},{"id":"https://openalex.org/I4210137766","display_name":"Institute for Advanced Study","ror":"https://ror.org/03xg85719","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210137766"]},{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Burkhard Rost","raw_affiliation_strings":["Department of Informatics , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany","Institute for Advanced Study (TUM-IAS) , Garching/Munich 85748, Germany","TUM School of Life Sciences Weihenstephan (WZW) 85354, Germany","Technical University of Munich (TUM) , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Informatics , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany","institution_ids":["https://openalex.org/I62916508"]},{"raw_affiliation_string":"Institute for Advanced Study (TUM-IAS) , Garching/Munich 85748, Germany","institution_ids":["https://openalex.org/I4210137766"]},{"raw_affiliation_string":"TUM School of Life Sciences Weihenstephan (WZW) 85354, Germany","institution_ids":["https://openalex.org/I309496635"]},{"raw_affiliation_string":"Technical University of Munich (TUM) , Bioinformatics and Computational Biology\u2014i12 , , Garching/Munich 85748, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081250379","display_name":"Christine Orengo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210157240","display_name":"Institute of Structural and Molecular Biology","ror":"https://ror.org/05wsetc54","country_code":"GB","type":"facility","lineage":["https://openalex.org/I124357947","https://openalex.org/I124357947","https://openalex.org/I4210157240","https://openalex.org/I45129253","https://openalex.org/I98259816"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Christine Orengo","raw_affiliation_strings":["Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Structural and Molecular Biology, University College London , London WC1E 6BT, UK","institution_ids":["https://openalex.org/I4210157240","https://openalex.org/I45129253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5081250379"],"corresponding_institution_ids":["https://openalex.org/I4210157240","https://openalex.org/I45129253"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618},"apc_paid":{"value":3618,"currency":"USD","value_usd":3618},"fwci":11.2804,"has_fulltext":true,"cited_by_count":43,"citation_normalized_percentile":{"value":0.98701508,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"39","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.6468999981880188,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.6468999981880188,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.17599999904632568,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.041600000113248825,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7370020151138306},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.584148645401001},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5490097403526306},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5386898517608643},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5098692178726196},{"id":"https://openalex.org/keywords/protein-sequencing","display_name":"Protein sequencing","score":0.4582279622554779},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45449864864349365},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4519094228744507},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4504181742668152},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4439803659915924},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4167941212654114},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4163840711116791},{"id":"https://openalex.org/keywords/structural-classification-of-proteins-database","display_name":"Structural Classification of Proteins database","score":0.4138652980327606},{"id":"https://openalex.org/keywords/protein-structure","display_name":"Protein structure","score":0.2796649932861328},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1714114546775818},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.16105550527572632},{"id":"https://openalex.org/keywords/peptide-sequence","display_name":"Peptide sequence","score":0.1500447690486908},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1237478256225586},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.08771860599517822},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.08552926778793335}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7370020151138306},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.584148645401001},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5490097403526306},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5386898517608643},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5098692178726196},{"id":"https://openalex.org/C10010492","wikidata":"https://www.wikidata.org/wiki/Q3142557","display_name":"Protein sequencing","level":4,"score":0.4582279622554779},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45449864864349365},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4519094228744507},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4504181742668152},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4439803659915924},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4167941212654114},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4163840711116791},{"id":"https://openalex.org/C58773245","wikidata":"https://www.wikidata.org/wiki/Q4832556","display_name":"Structural Classification of Proteins database","level":3,"score":0.4138652980327606},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.2796649932861328},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1714114546775818},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.16105550527572632},{"id":"https://openalex.org/C167625842","wikidata":"https://www.wikidata.org/wiki/Q899763","display_name":"Peptide sequence","level":3,"score":0.1500447690486908},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1237478256225586},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.08771860599517822},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.08552926778793335},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D017386","descriptor_name":"Sequence Homology, Amino Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017386","descriptor_name":"Sequence Homology, Amino Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017386","descriptor_name":"Sequence Homology, Amino Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1093/bioinformatics/btad029","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btad029","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/39/1/btad029/48959111/btad029.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},{"id":"pmid:36648327","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/36648327","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics (Oxford, England)","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:9887088","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/9887088","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC9887088/pdf/btad029.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Bioinformatics","raw_type":"Text"},{"id":"pmh:oai:www.research.unipd.it:11577/3597644","is_oa":true,"landing_page_url":"https://hdl.handle.net/11577/3597644","pdf_url":null,"source":{"id":"https://openalex.org/S4306402547","display_name":"Padua Research Archive (University of Padova)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I138689650","host_organization_name":"University of Padua","host_organization_lineage":["https://openalex.org/I138689650"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1093/bioinformatics/btad029","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btad029","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/39/1/btad029/48959111/btad029.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1271904115","display_name":"Increasing the Coverage and Accuracy of CATH for Comparative Genomics and Variant Interpretation","funder_award_id":"BB/R014892/1","funder_id":"https://openalex.org/F4320334629","funder_display_name":"Biotechnology and Biological Sciences Research Council"},{"id":"https://openalex.org/G1305978807","display_name":"Leveraging functional profiling datasets with machine learning to uncover proteins and cellular processes important for ageing","funder_award_id":"BB/R009597/1","funder_id":"https://openalex.org/F4320334629","funder_display_name":"Biotechnology and Biological Sciences Research Council"},{"id":"https://openalex.org/G1674536948","display_name":"Unlocking the chemical potential of plants: Predicting function from DNA sequence for complex enzyme superfamilies","funder_award_id":"BB/V014722/1","funder_id":"https://openalex.org/F4320334629","funder_display_name":"Biotechnology and Biological Sciences Research Council"},{"id":"https://openalex.org/G3068706553","display_name":null,"funder_award_id":"RO1320/4\u20131","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"},{"id":"https://openalex.org/G3957071345","display_name":null,"funder_award_id":"BB/R009597/1","funder_id":"https://openalex.org/F4320334629","funder_display_name":"Biotechnology and Biological Sciences Research Council"},{"id":"https://openalex.org/G5479798918","display_name":null,"funder_award_id":"BB/V014722/1","funder_id":"https://openalex.org/F4320334629","funder_display_name":"Biotechnology and Biological Sciences Research Council"},{"id":"https://openalex.org/G5767990935","display_name":"3D-Gateway - Gateway to protein structure and function","funder_award_id":"BB/S020144/1","funder_id":"https://openalex.org/F4320334629","funder_display_name":"Biotechnology and Biological Sciences Research Council"},{"id":"https://openalex.org/G8763755177","display_name":"Exploiting data driven computational approaches for understanding protein structure and function in InterPro and Pfam","funder_award_id":"BB/S020039/1","funder_id":"https://openalex.org/F4320334629","funder_display_name":"Biotechnology and Biological Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320308269","display_name":"Alexander von Humboldt-Stiftung","ror":"https://ror.org/012kf4317"},{"id":"https://openalex.org/F4320320286","display_name":"University College London","ror":"https://ror.org/02jx3x895"},{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"},{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"},{"id":"https://openalex.org/F4320334629","display_name":"Biotechnology and Biological Sciences Research Council","ror":"https://ror.org/00cwqg982"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4316926589.pdf"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W1965582988","https://openalex.org/W1979132333","https://openalex.org/W1979147581","https://openalex.org/W2008708467","https://openalex.org/W2051210555","https://openalex.org/W2053247228","https://openalex.org/W2055043387","https://openalex.org/W2069458148","https://openalex.org/W2095705004","https://openalex.org/W2101234009","https://openalex.org/W2102652793","https://openalex.org/W2106868489","https://openalex.org/W2117486996","https://openalex.org/W2145358391","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2464717012","https://openalex.org/W2739999456","https://openalex.org/W2770307029","https://openalex.org/W2807818025","https://openalex.org/W2898210859","https://openalex.org/W2900359059","https://openalex.org/W2949342052","https://openalex.org/W2950954328","https://openalex.org/W2953008890","https://openalex.org/W2972411752","https://openalex.org/W2984894304","https://openalex.org/W3095583226","https://openalex.org/W3106745904","https://openalex.org/W3116433486","https://openalex.org/W3118936575","https://openalex.org/W3166142427","https://openalex.org/W3177500196","https://openalex.org/W3177828909","https://openalex.org/W3211795435","https://openalex.org/W4213112325","https://openalex.org/W4281291878","https://openalex.org/W4303981501","https://openalex.org/W4327550249","https://openalex.org/W6674330103","https://openalex.org/W6675354045","https://openalex.org/W6769989246"],"related_works":["https://openalex.org/W2589992599","https://openalex.org/W2046399516","https://openalex.org/W2164753093","https://openalex.org/W2020085652","https://openalex.org/W2821108685","https://openalex.org/W2041862730","https://openalex.org/W2318097870","https://openalex.org/W2963616829","https://openalex.org/W2042084565","https://openalex.org/W2150963760"],"abstract_inverted_index":{"MOTIVATION:":[0],"CATH":[1,102,135,169,202],"is":[2,218],"a":[3,24,58,73,117,139,177],"protein":[4,66,143],"domain":[5,86],"classification":[6,26],"resource":[7],"that":[8,145],"exploits":[9],"an":[10,105],"automated":[11],"workflow":[12],"of":[13,27,34,75,107,120,123,142,179,189],"structure":[14],"and":[15,29,99,111,222],"sequence":[16,62,82],"comparison":[17],"alongside":[18],"expert":[19],"manual":[20],"curation":[21],"to":[22,38,84,125,167,204],"construct":[23],"hierarchical":[25],"evolutionary":[28],"structural":[30],"relationships.":[31],"The":[32,53,92,212],"aim":[33],"this":[35,227],"study":[36,228],"was":[37,70],"develop":[39],"algorithms":[40],"for":[41,171,214],"detecting":[42],"remote":[43,76,128],"homologues":[44,77,129],"missed":[45,130],"by":[46,131,192],"state-of-the-art":[47],"hidden":[48],"Markov":[49],"model":[50],"(HMM)-based":[51],"approaches.":[52],"method":[54],"developed":[55,216,225],"(CATHe)":[56],"combines":[57],"neural":[59],"network":[60],"with":[61,198],"representations":[63],"obtained":[64],"from":[65,134,182,200],"language":[67],"models.":[68],"It":[69],"assessed":[71],"using":[72,155],"dataset":[74,140],"having":[78],"less":[79],"than":[80],"20%":[81],"identity":[83],"any":[85],"in":[87,148,152,226],"the":[88,121,190,201,215,223],"training":[89],"set.":[90],"RESULTS:":[91],"CATHe":[93,124,158],"models":[94,217],"trained":[95],"on":[96,220,232],"1773":[97],"largest":[98,101],"50":[100],"superfamilies":[103,203],"had":[104,146],"accuracy":[106],"85.6":[108],"\u00b1":[109,113],"0.4%":[110],"98.2":[112],"0.3%,":[114],"respectively.":[115],"As":[116],"further":[118],"test":[119],"power":[122],"detect":[126],"more":[127],"HMMs":[132],"derived":[133],"domains,":[136],"we":[137,164,185],"used":[138],"consisting":[141],"domains":[144,181],"annotations":[147,170],"Pfam,":[149],"but":[150],"not":[151],"CATH.":[153],"By":[154],"highly":[156],"reliable":[157],"predictions":[159,191],"(expected":[160],"error":[161],"rate":[162],"<0.5%),":[163],"were":[165,207],"able":[166],"provide":[168],"4.62":[172],"million":[173],"Pfam":[174],"domains.":[175],"For":[176],"subset":[178],"these":[180],"Homo":[183],"sapiens,":[184],"structurally":[186],"validated":[187],"90.86%":[188],"comparing":[193],"their":[194],"corresponding":[195],"AlphaFold2":[196],"structures":[197,199],"which":[205],"they":[206],"assigned.":[208],"AVAILABILITY":[209],"AND":[210],"IMPLEMENTATION:":[211],"code":[213],"available":[219,239],"https://github.com/vam-sin/CATHe,":[221],"datasets":[224],"can":[229],"be":[230],"accessed":[231],"https://zenodo.org/record/6327572.":[233],"SUPPLEMENTARY":[234],"INFORMATION:":[235],"Supplementary":[236],"data":[237],"are":[238],"at":[240],"Bioinformatics":[241],"online.":[242]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":14},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":4}],"updated_date":"2026-06-03T09:05:47.796612","created_date":"2025-10-10T00:00:00"}
