{"id":"https://openalex.org/W2034115630","doi":"https://doi.org/10.1371/journal.pcbi.1001047","title":"Detecting Remote Evolutionary Relationships among Proteins by Large-Scale Semantic Embedding","display_name":"Detecting Remote Evolutionary Relationships among Proteins by Large-Scale Semantic Embedding","publication_year":2011,"publication_date":"2011-01-27","ids":{"openalex":"https://openalex.org/W2034115630","doi":"https://doi.org/10.1371/journal.pcbi.1001047","mag":"2034115630","pmid":"https://pubmed.ncbi.nlm.nih.gov/21298082"},"language":"en","primary_location":{"id":"doi:10.1371/journal.pcbi.1001047","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1001047","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1001047&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1001047&type=printable","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058301235","display_name":"Iain Melvin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Iain Melvin","raw_affiliation_strings":["NEC Laboratories America, Princeton, New Jersey, United States of America"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NEC Laboratories America, Princeton, New Jersey, United States of America","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076635608","display_name":"Jason Weston","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Weston","raw_affiliation_strings":["Google, New York, New York, United States of America"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, New York, New York, United States of America","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057375933","display_name":"William Stafford Noble","orcid":"https://orcid.org/0000-0001-7283-4715"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"William Stafford Noble","raw_affiliation_strings":["Department of Genome Sciences, University of Washington, Seattle, Washington, United States of America"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Genome Sciences, University of Washington, Seattle, Washington, United States of America","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085231879","display_name":"Christina S. Leslie","orcid":"https://orcid.org/0000-0002-4571-5910"},"institutions":[{"id":"https://openalex.org/I1334819555","display_name":"Memorial Sloan Kettering Cancer Center","ror":"https://ror.org/02yrq0923","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1334819555"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Christina Leslie","raw_affiliation_strings":["Computational Biology Program, Memorial Sloan-Kettering Cancer Center, New York, New York, United States of America","Computational Biology Program, Memorial Sloan Kettering Cancer Center, New York, New York, United States of America"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computational Biology Program, Memorial Sloan-Kettering Cancer Center, New York, New York, United States of America","institution_ids":["https://openalex.org/I1334819555"]},{"raw_affiliation_string":"Computational Biology Program, Memorial Sloan Kettering Cancer Center, New York, New York, United States of America","institution_ids":["https://openalex.org/I1334819555"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5057375933","https://openalex.org/A5085231879"],"corresponding_institution_ids":["https://openalex.org/I1334819555","https://openalex.org/I201448701"],"apc_list":{"value":2655,"currency":"USD","value_usd":2655},"apc_paid":{"value":2655,"currency":"USD","value_usd":2655},"fwci":0.7981,"has_fulltext":true,"cited_by_count":30,"citation_normalized_percentile":{"value":0.70980204,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"7","issue":"1","first_page":"e1001047","last_page":"e1001047"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6656495928764343},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.6175621747970581},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5230620503425598},{"id":"https://openalex.org/keywords/protein-structure-database","display_name":"Protein structure database","score":0.49078068137168884},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4432760775089264},{"id":"https://openalex.org/keywords/structural-classification-of-proteins-database","display_name":"Structural Classification of Proteins database","score":0.44093117117881775},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.4302242696285248},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.42634376883506775},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3986700177192688},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3740638196468353},{"id":"https://openalex.org/keywords/protein-structure","display_name":"Protein structure","score":0.2522381544113159},{"id":"https://openalex.org/keywords/sequence-database","display_name":"Sequence database","score":0.24202659726142883},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.14511239528656006},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.09455147385597229}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6656495928764343},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.6175621747970581},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5230620503425598},{"id":"https://openalex.org/C136475424","wikidata":"https://www.wikidata.org/wiki/Q7251500","display_name":"Protein structure database","level":4,"score":0.49078068137168884},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4432760775089264},{"id":"https://openalex.org/C58773245","wikidata":"https://www.wikidata.org/wiki/Q4832556","display_name":"Structural Classification of Proteins database","level":3,"score":0.44093117117881775},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.4302242696285248},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.42634376883506775},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3986700177192688},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3740638196468353},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.2522381544113159},{"id":"https://openalex.org/C41584329","wikidata":"https://www.wikidata.org/wiki/Q175902","display_name":"Sequence database","level":3,"score":0.24202659726142883},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.14511239528656006},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.09455147385597229},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D005075","descriptor_name":"Biological Evolution","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D005075","descriptor_name":"Biological Evolution","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D005075","descriptor_name":"Biological Evolution","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D005075","descriptor_name":"Biological Evolution","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":7,"locations":[{"id":"doi:10.1371/journal.pcbi.1001047","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1001047","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1001047&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS Computational Biology","raw_type":"journal-article"},{"id":"pmid:21298082","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/21298082","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS computational biology","raw_type":null},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.815.4867","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.815.4867","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/32/18/pcbi.1001047.PMC3029239.pdf","raw_type":"text"},{"id":"pmh:oai:RePEc:plo:pcbi00:1001047","is_oa":false,"landing_page_url":"https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1001047","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:doaj.org/article:2a277e17bdbd4dd5add8156cd8a2073e","is_oa":true,"landing_page_url":"https://doaj.org/article/2a277e17bdbd4dd5add8156cd8a2073e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Computational Biology, Vol 7, Iss 1, p e1001047 (2011)","raw_type":"article"},{"id":"pmh:oai:figshare.com:article/139227","is_oa":true,"landing_page_url":"https://figshare.com/articles/Detecting_Remote_Evolutionary_Relationships_among_Proteins_by_Large_Scale_Semantic_Embedding/139227","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dataset"},{"id":"pmh:oai:pubmedcentral.nih.gov:3029239","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/3029239","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Comput Biol","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1371/journal.pcbi.1001047","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1001047","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1001047&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS Computational Biology","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6499999761581421,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2034115630.pdf","grobid_xml":"https://content.openalex.org/works/W2034115630.grobid-xml"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W1549656520","https://openalex.org/W1965956170","https://openalex.org/W1968697272","https://openalex.org/W2047221353","https://openalex.org/W2055043387","https://openalex.org/W2082667898","https://openalex.org/W2085277871","https://openalex.org/W2087064593","https://openalex.org/W2096748155","https://openalex.org/W2105381419","https://openalex.org/W2110065044","https://openalex.org/W2117130368","https://openalex.org/W2122369731","https://openalex.org/W2124158580","https://openalex.org/W2126016150","https://openalex.org/W2127338593","https://openalex.org/W2133075481","https://openalex.org/W2143331230","https://openalex.org/W2145358391","https://openalex.org/W2147667050","https://openalex.org/W2152688507","https://openalex.org/W2157316923","https://openalex.org/W2158714788","https://openalex.org/W2161056921","https://openalex.org/W2541900248","https://openalex.org/W2988119488","https://openalex.org/W3150945973","https://openalex.org/W4241676240","https://openalex.org/W6641828625","https://openalex.org/W6678068712","https://openalex.org/W6681369196","https://openalex.org/W6683441042"],"related_works":["https://openalex.org/W1968265719","https://openalex.org/W2183523499","https://openalex.org/W2155238244","https://openalex.org/W4235848672","https://openalex.org/W2008052738","https://openalex.org/W2071382179","https://openalex.org/W2150948439","https://openalex.org/W1971206962","https://openalex.org/W2167463089","https://openalex.org/W2919728387"],"abstract_inverted_index":{"Virtually":[0],"every":[1],"molecular":[2],"biologist":[3],"has":[4],"searched":[5],"a":[6,20,114,127,195,215],"protein":[7,62,80,124,196,224],"or":[8,149],"DNA":[9],"sequence":[10,24,38,170,225],"database":[11,39],"to":[12,19,166,214],"find":[13,160],"sequences":[14,125],"that":[15,161],"are":[16,134],"evolutionarily":[17],"related":[18,132],"given":[21,216],"query.":[22],"Pairwise":[23],"comparison":[25,68],"methods--i.e.,":[26],"measures":[27],"of":[28,46,49,56,79,99,104,123,142,194,223],"similarity":[29,148,197],"between":[30,61],"query":[31],"and":[32,41,92,139,174,212],"target":[33],"sequences--provide":[34],"the":[35,44,53,64,97,101,105,156,192,200,209,221],"engine":[36],"for":[37,176],"search":[40,91],"have":[42],"been":[43],"subject":[45],"30":[47],"years":[48],"computational":[50],"research.":[51],"For":[52],"difficult":[54],"problem":[55],"detecting":[57],"remote":[58,177],"evolutionary":[59],"relationships":[60],"sequences,":[63],"most":[65],"successful":[66],"pairwise":[67,169],"methods":[69,171],"involve":[70],"building":[71],"local":[72,213],"models":[73],"(e.g.,":[74],"profile":[75],"hidden":[76],"Markov":[77],"models)":[78],"sequences.":[81],"However,":[82],"recent":[83],"work":[84],"in":[85,136,191],"massive":[86],"data":[87,106],"domains":[88],"like":[89,172],"web":[90],"natural":[93],"language":[94],"processing":[95],"demonstrate":[96],"advantage":[98],"exploiting":[100],"global":[102,189,210],"structure":[103,190,222],"space.":[107,226],"Motivated":[108],"by":[109],"this":[110],"work,":[111],"we":[112],"present":[113],"large-scale":[115],"algorithm":[116],"called":[117],"ProtEmbed,":[118],"which":[119,187],"learns":[120],"an":[121],"embedding":[122,202],"into":[126,155],"low-dimensional":[128],"\"semantic":[129],"space.\"":[130],"Evolutionarily":[131],"proteins":[133],"embedded":[135],"close":[137],"proximity,":[138],"additional":[140],"pieces":[141],"evidence,":[143],"such":[144],"as":[145],"3D":[146],"structural":[147],"class":[150],"labels,":[151],"can":[152,204],"be":[153,205],"incorporated":[154],"learning":[157],"process.":[158],"We":[159],"ProtEmbed":[162,201],"achieves":[163],"superior":[164],"accuracy":[165],"widely":[167],"used":[168],"PSI-BLAST":[173],"HHSearch":[175],"homology":[178],"detection;":[179],"it":[180],"also":[181],"outperforms":[182],"our":[183],"previous":[184],"RankProp":[185],"algorithm,":[186],"incorporates":[188],"form":[193],"network.":[198],"Finally,":[199],"space":[203],"visualized,":[206],"both":[207],"at":[208],"level":[211],"query,":[217],"yielding":[218],"intuition":[219],"about":[220]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":3}],"updated_date":"2026-05-03T08:25:01.440150","created_date":"2025-10-10T00:00:00"}
