{"id":"https://openalex.org/W4387346613","doi":"https://doi.org/10.1145/3584371.3612942","title":"A Comparative Analysis of Transformer-based Protein Language Models for Remote Homology Prediction","display_name":"A Comparative Analysis of Transformer-based Protein Language Models for Remote Homology Prediction","publication_year":2023,"publication_date":"2023-09-03","ids":{"openalex":"https://openalex.org/W4387346613","doi":"https://doi.org/10.1145/3584371.3612942"},"language":"en","primary_location":{"id":"doi:10.1145/3584371.3612942","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3584371.3612942","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3584371.3612942","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3584371.3612942","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038994395","display_name":"Anowarul Kabir","orcid":"https://orcid.org/0000-0001-8060-2084"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Anowarul Kabir","raw_affiliation_strings":["Computer Science, George Mason University, Fairfax, Virginia, United States"],"raw_orcid":"https://orcid.org/0000-0001-8060-2084","affiliations":[{"raw_affiliation_string":"Computer Science, George Mason University, Fairfax, Virginia, United States","institution_ids":["https://openalex.org/I162714631"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089526864","display_name":"Asher Moldwin","orcid":"https://orcid.org/0000-0002-9215-5653"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Asher Moldwin","raw_affiliation_strings":["Computer Science, George Mason University, Fairfax, Virginia, United States"],"raw_orcid":"https://orcid.org/0000-0002-9215-5653","affiliations":[{"raw_affiliation_string":"Computer Science, George Mason University, Fairfax, Virginia, United States","institution_ids":["https://openalex.org/I162714631"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044722808","display_name":"Amarda Shehu","orcid":"https://orcid.org/0000-0001-5230-4610"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amarda Shehu","raw_affiliation_strings":["Computer Science, George Mason University, Fairfax, Virginia, United States"],"raw_orcid":"https://orcid.org/0000-0001-5230-4610","affiliations":[{"raw_affiliation_string":"Computer Science, George Mason University, Fairfax, Virginia, United States","institution_ids":["https://openalex.org/I162714631"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5038994395"],"corresponding_institution_ids":["https://openalex.org/I162714631"],"apc_list":null,"apc_paid":null,"fwci":0.2999,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.64019932,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6222116351127625},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6073057055473328},{"id":"https://openalex.org/keywords/homology","display_name":"Homology (biology)","score":0.5150251388549805},{"id":"https://openalex.org/keywords/protein-structure-prediction","display_name":"Protein structure prediction","score":0.49235761165618896},{"id":"https://openalex.org/keywords/homology-modeling","display_name":"Homology modeling","score":0.4324547052383423},{"id":"https://openalex.org/keywords/protein-design","display_name":"Protein design","score":0.4258269667625427},{"id":"https://openalex.org/keywords/protein-family","display_name":"Protein family","score":0.4121738076210022},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40393924713134766},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.39768415689468384},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3911442756652832},{"id":"https://openalex.org/keywords/protein-structure","display_name":"Protein structure","score":0.3415451645851135},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.21369829773902893},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.15226805210113525},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.11169683933258057}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6222116351127625},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6073057055473328},{"id":"https://openalex.org/C165525559","wikidata":"https://www.wikidata.org/wiki/Q224180","display_name":"Homology (biology)","level":3,"score":0.5150251388549805},{"id":"https://openalex.org/C18051474","wikidata":"https://www.wikidata.org/wiki/Q899656","display_name":"Protein structure prediction","level":3,"score":0.49235761165618896},{"id":"https://openalex.org/C169627665","wikidata":"https://www.wikidata.org/wiki/Q5767913","display_name":"Homology modeling","level":3,"score":0.4324547052383423},{"id":"https://openalex.org/C152769699","wikidata":"https://www.wikidata.org/wiki/Q410814","display_name":"Protein design","level":3,"score":0.4258269667625427},{"id":"https://openalex.org/C171897839","wikidata":"https://www.wikidata.org/wiki/Q417841","display_name":"Protein family","level":3,"score":0.4121738076210022},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40393924713134766},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.39768415689468384},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3911442756652832},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.3415451645851135},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.21369829773902893},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.15226805210113525},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.11169683933258057},{"id":"https://openalex.org/C181199279","wikidata":"https://www.wikidata.org/wiki/Q8047","display_name":"Enzyme","level":2,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3584371.3612942","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3584371.3612942","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3584371.3612942","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3584371.3612942","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3584371.3612942","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3584371.3612942","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309029","display_name":"George Mason University","ror":"https://ror.org/02jqj7156"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387346613.pdf","grobid_xml":"https://content.openalex.org/works/W4387346613.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W1976526581","https://openalex.org/W2097632784","https://openalex.org/W2101220662","https://openalex.org/W2114850508","https://openalex.org/W2161072217","https://openalex.org/W2328176404","https://openalex.org/W2550969987","https://openalex.org/W2964161653","https://openalex.org/W2984761660","https://openalex.org/W2995514860","https://openalex.org/W2996412683","https://openalex.org/W2999481648","https://openalex.org/W3040739508","https://openalex.org/W3101509328","https://openalex.org/W3118936575","https://openalex.org/W3146944767","https://openalex.org/W3166142427","https://openalex.org/W3177500196","https://openalex.org/W3215918380","https://openalex.org/W4210494137","https://openalex.org/W4242765109","https://openalex.org/W4286499749"],"related_works":["https://openalex.org/W2253761889","https://openalex.org/W3204091482","https://openalex.org/W2081601977","https://openalex.org/W2368468053","https://openalex.org/W4387310732","https://openalex.org/W2119378770","https://openalex.org/W1564749278","https://openalex.org/W2968494487","https://openalex.org/W3216044760","https://openalex.org/W2595628180"],"abstract_inverted_index":{"Protein":[0],"language":[1],"models":[2,62,119],"based":[3],"on":[4,21,77],"the":[5,75,100,124],"transformer":[6],"architecture":[7],"are":[8,120],"increasingly":[9],"shown":[10],"to":[11,74],"learn":[12,65],"rich":[13],"representations":[14],"from":[15],"protein":[16,26,47],"sequences":[17],"that":[18,60,115],"improve":[19],"performance":[20],"a":[22,32,78,89],"variety":[23],"of":[24,35,39,102,127],"downstream":[25],"prediction":[27,38,105],"tasks.":[28],"These":[29],"tasks":[30],"encompass":[31],"wide":[33],"range":[34],"predictions,":[36],"including":[37],"secondary":[40],"structure,":[41],"subcellular":[42],"localization,":[43],"evolutionary":[44],"relationships":[45],"within":[46],"families,":[48],"as":[49,51],"well":[50],"superfamily":[52],"and":[53],"family":[54],"membership.":[55],"There":[56],"is":[57],"recent":[58],"evidence":[59],"such":[61],"also":[63],"implicitly":[64],"structural":[66],"information.":[67],"In":[68],"this":[69,73],"paper":[70],"we":[71,97,113],"put":[72],"test":[76],"hallmark":[79],"problem":[80,101],"in":[81,123],"computational":[82],"biology,":[83],"remote":[84,103],"homology":[85,104],"prediction.":[86],"We":[87],"employ":[88],"rigorous":[90],"setting,":[91],"where,":[92],"by":[93],"lowering":[94],"sequence":[95,130],"identity,":[96],"clarify":[98],"whether":[99],"has":[106],"been":[107],"solved.":[108],"Among":[109],"various":[110],"interesting":[111],"findings,":[112],"report":[114],"current":[116],"state-of-the-art,":[117],"large":[118],"still":[121],"underperforming":[122],"\"twilight":[125],"zone\"":[126],"very":[128],"low":[129],"identity.":[131]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
