{"id":"https://openalex.org/W3137581839","doi":"https://doi.org/10.1021/acs.jcim.0c01285","title":"MSA-Regularized Protein Sequence Transformer toward Predicting Genome-Wide Chemical-Protein Interactions: Application to GPCRome Deorphanization","display_name":"MSA-Regularized Protein Sequence Transformer toward Predicting Genome-Wide Chemical-Protein Interactions: Application to GPCRome Deorphanization","publication_year":2021,"publication_date":"2021-03-23","ids":{"openalex":"https://openalex.org/W3137581839","doi":"https://doi.org/10.1021/acs.jcim.0c01285","mag":"3137581839","pmid":"https://pubmed.ncbi.nlm.nih.gov/33757283"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.0c01285","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.0c01285","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.0c01285","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.0c01285","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043728103","display_name":"Tian Cai","orcid":"https://orcid.org/0000-0002-9516-6489"},"institutions":[{"id":"https://openalex.org/I121847817","display_name":"The Graduate Center, CUNY","ror":"https://ror.org/00awd9g61","country_code":"US","type":"education","lineage":["https://openalex.org/I121847817"]},{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tian Cai","raw_affiliation_strings":["Ph.D. Program in Computer Science, The Graduate Center, The City University of New York, New York, New York 10016, United States"],"affiliations":[{"raw_affiliation_string":"Ph.D. Program in Computer Science, The Graduate Center, The City University of New York, New York, New York 10016, United States","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050104020","display_name":"Hansaim Lim","orcid":"https://orcid.org/0000-0002-8420-4750"},"institutions":[{"id":"https://openalex.org/I121847817","display_name":"The Graduate Center, CUNY","ror":"https://ror.org/00awd9g61","country_code":"US","type":"education","lineage":["https://openalex.org/I121847817"]},{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hansaim Lim","raw_affiliation_strings":["Ph.D. Program in Biochemistry, The Graduate Center, The City University of New York, New York, New York 10016, United States"],"affiliations":[{"raw_affiliation_string":"Ph.D. Program in Biochemistry, The Graduate Center, The City University of New York, New York, New York 10016, United States","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009212722","display_name":"Kyra Alyssa Abbu","orcid":"https://orcid.org/0000-0001-9020-301X"},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]},{"id":"https://openalex.org/I39694355","display_name":"Hunter College","ror":"https://ror.org/00g2xk477","country_code":"US","type":"education","lineage":["https://openalex.org/I39694355"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kyra Alyssa Abbu","raw_affiliation_strings":["Department of Computer Science, Hunter College, The City University of New York, New York, New York 10065, United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Hunter College, The City University of New York, New York, New York 10065, United States","institution_ids":["https://openalex.org/I39694355","https://openalex.org/I174216632"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027520226","display_name":"Yue Qiu","orcid":"https://orcid.org/0000-0001-9692-1290"},"institutions":[{"id":"https://openalex.org/I121847817","display_name":"The Graduate Center, CUNY","ror":"https://ror.org/00awd9g61","country_code":"US","type":"education","lineage":["https://openalex.org/I121847817"]},{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yue Qiu","raw_affiliation_strings":["Ph.D. Program in Biology, The Graduate Center, The City University of New York, New York, New York 10016, United States"],"affiliations":[{"raw_affiliation_string":"Ph.D. Program in Biology, The Graduate Center, The City University of New York, New York, New York 10016, United States","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025357844","display_name":"Ruth Nussinov","orcid":null},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]},{"id":"https://openalex.org/I4210130649","display_name":"Frederick National Laboratory for Cancer Research","ror":"https://ror.org/03v6m3209","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I4210130649","https://openalex.org/I4210140884"]}],"countries":["IL","US"],"is_corresponding":false,"raw_author_name":"Ruth Nussinov","raw_affiliation_strings":["Computational Structural Biology Section, Basic Science Program, Frederick National Laboratory for Cancer Research, Frederick, Maryland 21702, United States","Department of Human Molecular Genetics and Biochemistry, Sackler School of Medicine, Tel Aviv University, Tel Aviv 69978, Israel"],"affiliations":[{"raw_affiliation_string":"Computational Structural Biology Section, Basic Science Program, Frederick National Laboratory for Cancer Research, Frederick, Maryland 21702, United States","institution_ids":["https://openalex.org/I4210130649"]},{"raw_affiliation_string":"Department of Human Molecular Genetics and Biochemistry, Sackler School of Medicine, Tel Aviv University, Tel Aviv 69978, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066245750","display_name":"Lei Xie","orcid":"https://orcid.org/0000-0001-9051-2111"},"institutions":[{"id":"https://openalex.org/I121847817","display_name":"The Graduate Center, CUNY","ror":"https://ror.org/00awd9g61","country_code":"US","type":"education","lineage":["https://openalex.org/I121847817"]},{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]},{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]},{"id":"https://openalex.org/I39694355","display_name":"Hunter College","ror":"https://ror.org/00g2xk477","country_code":"US","type":"education","lineage":["https://openalex.org/I39694355"]},{"id":"https://openalex.org/I4210097825","display_name":"MIND Research Institute","ror":"https://ror.org/00we1gw23","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210097825"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Lei Xie","raw_affiliation_strings":["Department of Computer Science, Hunter College, The City University of New York, New York, New York 10065, United States","Helen and Robert Appel Alzheimer\u2019s Disease Research Institute, Feil Family Brain & Mind Research Institute, Weill Cornell Medicine, Cornell University, New York, New York 10021, United States","Ph.D. Program in Biochemistry, The Graduate Center, The City University of New York, New York, New York 10016, United States","Ph.D. Program in Biology, The Graduate Center, The City University of New York, New York, New York 10016, United States","Ph.D. Program in Computer Science, The Graduate Center, The City University of New York, New York, New York 10016, United States","Helen and Robert Appel Alzheimer's Disease Research Institute, Feil Family Brain & Mind Research Institute, Weill Cornell Medicine, Cornell University, New York, New York 10021, United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Hunter College, The City University of New York, New York, New York 10065, United States","institution_ids":["https://openalex.org/I39694355","https://openalex.org/I174216632"]},{"raw_affiliation_string":"Helen and Robert Appel Alzheimer\u2019s Disease Research Institute, Feil Family Brain & Mind Research Institute, Weill Cornell Medicine, Cornell University, New York, New York 10021, United States","institution_ids":["https://openalex.org/I4210097825","https://openalex.org/I205783295"]},{"raw_affiliation_string":"Ph.D. Program in Biochemistry, The Graduate Center, The City University of New York, New York, New York 10016, United States","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]},{"raw_affiliation_string":"Ph.D. Program in Biology, The Graduate Center, The City University of New York, New York, New York 10016, United States","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]},{"raw_affiliation_string":"Ph.D. Program in Computer Science, The Graduate Center, The City University of New York, New York, New York 10016, United States","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]},{"raw_affiliation_string":"Helen and Robert Appel Alzheimer's Disease Research Institute, Feil Family Brain & Mind Research Institute, Weill Cornell Medicine, Cornell University, New York, New York 10021, United States","institution_ids":["https://openalex.org/I4210097825","https://openalex.org/I205783295"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5066245750"],"corresponding_institution_ids":["https://openalex.org/I121847817","https://openalex.org/I174216632","https://openalex.org/I205783295","https://openalex.org/I39694355","https://openalex.org/I4210097825"],"apc_list":null,"apc_paid":null,"fwci":2.1909,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.88531132,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":99},"biblio":{"volume":"61","issue":"4","first_page":"1570","last_page":"1582"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/protein-sequencing","display_name":"Protein sequencing","score":0.6240512132644653},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.609673261642456},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.6076192855834961},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5721803307533264},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5220289826393127},{"id":"https://openalex.org/keywords/protein-function-prediction","display_name":"Protein function prediction","score":0.5042988061904907},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4961867928504944},{"id":"https://openalex.org/keywords/sequence-learning","display_name":"Sequence learning","score":0.4733674228191376},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4153003692626953},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.3612881898880005},{"id":"https://openalex.org/keywords/peptide-sequence","display_name":"Peptide sequence","score":0.28146350383758545},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.27680355310440063},{"id":"https://openalex.org/keywords/protein-function","display_name":"Protein function","score":0.25505000352859497},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.19430464506149292}],"concepts":[{"id":"https://openalex.org/C10010492","wikidata":"https://www.wikidata.org/wiki/Q3142557","display_name":"Protein sequencing","level":4,"score":0.6240512132644653},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.609673261642456},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.6076192855834961},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5721803307533264},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5220289826393127},{"id":"https://openalex.org/C207060522","wikidata":"https://www.wikidata.org/wiki/Q7251473","display_name":"Protein function prediction","level":4,"score":0.5042988061904907},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4961867928504944},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.4733674228191376},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4153003692626953},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.3612881898880005},{"id":"https://openalex.org/C167625842","wikidata":"https://www.wikidata.org/wiki/Q899763","display_name":"Peptide sequence","level":3,"score":0.28146350383758545},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.27680355310440063},{"id":"https://openalex.org/C2986374874","wikidata":"https://www.wikidata.org/wiki/Q8054","display_name":"Protein function","level":3,"score":0.25505000352859497},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.19430464506149292}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008024","descriptor_name":"Ligands","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008024","descriptor_name":"Ligands","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008024","descriptor_name":"Ligands","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008024","descriptor_name":"Ligands","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008024","descriptor_name":"Ligands","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008024","descriptor_name":"Ligands","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010802","descriptor_name":"Phylogeny","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010802","descriptor_name":"Phylogeny","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010802","descriptor_name":"Phylogeny","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010802","descriptor_name":"Phylogeny","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010802","descriptor_name":"Phylogeny","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010802","descriptor_name":"Phylogeny","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1021/acs.jcim.0c01285","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.0c01285","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.0c01285","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:33757283","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33757283","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:8154251","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8154251","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Chem Inf Model","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1021/acs.jcim.0c01285","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.0c01285","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.0c01285","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5799999833106995,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2980691641","display_name":null,"funder_award_id":"R01AD057555","funder_id":"https://openalex.org/F4320337337","funder_display_name":"National Institute on Aging"},{"id":"https://openalex.org/G6392594831","display_name":null,"funder_award_id":"R01GM122845","funder_id":"https://openalex.org/F4320337354","funder_display_name":"National Institute of General Medical Sciences"}],"funders":[{"id":"https://openalex.org/F4320337337","display_name":"National Institute on Aging","ror":"https://ror.org/049v75w11"},{"id":"https://openalex.org/F4320337351","display_name":"National Cancer Institute","ror":"https://ror.org/040gcmg81"},{"id":"https://openalex.org/F4320337354","display_name":"National Institute of General Medical Sciences","ror":"https://ror.org/04q48ey07"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3137581839.pdf","grobid_xml":"https://content.openalex.org/works/W3137581839.grobid-xml"},"referenced_works_count":61,"referenced_works":["https://openalex.org/W1680392829","https://openalex.org/W1993677150","https://openalex.org/W1999068689","https://openalex.org/W2048718061","https://openalex.org/W2073654245","https://openalex.org/W2096541451","https://openalex.org/W2100257305","https://openalex.org/W2100301244","https://openalex.org/W2109254532","https://openalex.org/W2133564696","https://openalex.org/W2155063687","https://openalex.org/W2158714788","https://openalex.org/W2170146596","https://openalex.org/W2194775991","https://openalex.org/W2256040435","https://openalex.org/W2399976582","https://openalex.org/W2513727910","https://openalex.org/W2527836113","https://openalex.org/W2552024303","https://openalex.org/W2566823781","https://openalex.org/W2592742128","https://openalex.org/W2618851150","https://openalex.org/W2626778328","https://openalex.org/W2765153686","https://openalex.org/W2777807685","https://openalex.org/W2782874671","https://openalex.org/W2791708148","https://openalex.org/W2798812533","https://openalex.org/W2807792492","https://openalex.org/W2809216727","https://openalex.org/W2883251903","https://openalex.org/W2898402099","https://openalex.org/W2898580681","https://openalex.org/W2899788782","https://openalex.org/W2943495267","https://openalex.org/W2949888546","https://openalex.org/W2950020917","https://openalex.org/W2951433247","https://openalex.org/W2953799871","https://openalex.org/W2965373594","https://openalex.org/W2965589679","https://openalex.org/W2971227267","https://openalex.org/W2974168418","https://openalex.org/W2981400941","https://openalex.org/W2982255935","https://openalex.org/W2996428491","https://openalex.org/W3005769002","https://openalex.org/W3015490653","https://openalex.org/W3024761859","https://openalex.org/W3028589594","https://openalex.org/W3040739508","https://openalex.org/W3096561213","https://openalex.org/W3110608978","https://openalex.org/W3146944767","https://openalex.org/W3158236124","https://openalex.org/W3177500196","https://openalex.org/W4237903493","https://openalex.org/W6600424091","https://openalex.org/W6603222412","https://openalex.org/W6631918127","https://openalex.org/W6834628200"],"related_works":["https://openalex.org/W2883195674","https://openalex.org/W2072736607","https://openalex.org/W2159663778","https://openalex.org/W3080374445","https://openalex.org/W2964790801","https://openalex.org/W2095784700","https://openalex.org/W2936543792","https://openalex.org/W2981223346","https://openalex.org/W2056092504","https://openalex.org/W4236358448"],"abstract_inverted_index":{"Small":[0],"molecules":[1],"play":[2],"a":[3,39,60,86,122,168,198],"critical":[4],"role":[5],"in":[6,20],"modulating":[7],"biological":[8],"systems.":[9],"Knowledge":[10],"of":[11,29,38,42,116,156,176,188,208,220,225,264,274],"chemical-protein":[12,177],"interactions":[13],"helps":[14],"address":[15],"fundamental":[16],"and":[17,22,48,72,141,159,192,212,248,258],"practical":[18],"questions":[19],"biology":[21],"medicine.":[23],"However,":[24],"with":[25,74],"the":[26,33,131,154,163,173,180,186,194,206,218,251,271],"rapid":[27],"emergence":[28],"newly":[30],"sequenced":[31,276],"genes,":[32],"endogenous":[34],"or":[35,77],"surrogate":[36],"ligands":[37,58,240],"vast":[40],"number":[41],"proteins":[43,152],"remain":[44],"unknown.":[45],"Homology":[46],"modeling":[47],"machine":[49,189],"learning":[50,89,115,175,190],"are":[51],"two":[52],"major":[53],"methods":[54,196],"for":[55,130,172,269],"assigning":[56],"new":[57,87],"to":[59,91,95,147,217,238,241,249],"protein":[61,71,118,132,139],"but":[62],"mostly":[63],"fail":[64],"when":[65],"sequence":[66,126,133,144,210],"homology":[67],"between":[68,151],"an":[69,267],"unannotated":[70,98],"those":[73],"known":[75],"functions":[76],"structures":[78],"is":[79],"low.":[80],"In":[81,179],"this":[82],"study,":[83],"we":[84,120,166],"develop":[85,121],"deep":[88],"framework":[90],"predict":[92],"chemical":[93,272],"binding":[94],"evolutionary":[96,111],"divergent":[97],"proteins,":[99],"whose":[100],"ligand":[101,259],"cannot":[102],"be":[103],"reliably":[104],"predicted":[105],"by":[106,162,197,254],"existing":[107],"methods.":[108],"By":[109],"incorporating":[110],"information":[112],"into":[113],"self-supervised":[114],"unlabeled":[117],"sequences,":[119],"novel":[123],"method,":[124],"distilled":[125],"alignment":[127,145],"embedding":[128],"(DISAE),":[129],"representation.":[134],"DISAE":[135,164,183,226,237,265],"can":[136],"utilize":[137],"all":[138],"sequences":[140],"their":[142,157,256],"multiple":[143],"(MSA)":[146],"capture":[148],"functional":[149],"relationships":[150],"without":[153],"knowledge":[155],"structure":[158],"function.":[160],"Followed":[161],"pretraining,":[165],"devise":[167],"module-based":[169],"fine-tuning":[170],"strategy":[171],"supervised":[174],"interactions.":[178],"benchmark":[181],"studies,":[182],"significantly":[184],"improves":[185],"generalizability":[187],"models":[191],"outperforms":[193],"state-of-the-art":[195],"large":[199],"margin.":[200],"Comprehensive":[201],"ablation":[202],"studies":[203],"suggest":[204],"that":[205,228],"use":[207,236],"MSA,":[209],"distillation,":[211],"triplet":[213],"pretraining":[214],"critically":[215],"contributes":[216],"success":[219],"DISAE.":[221],"The":[222,261],"interpretability":[223],"analysis":[224],"suggests":[227],"it":[229],"learns":[230],"biologically":[231],"meaningful":[232],"information.":[233],"We":[234],"further":[235],"assign":[239],"human":[242,252],"orphan":[243],"G-protein":[244],"coupled":[245],"receptors":[246],"(GPCRs)":[247],"cluster":[250],"GPCRome":[253],"integrating":[255],"phylogenetic":[257],"relationships.":[260],"promising":[262],"results":[263],"open":[266],"avenue":[268],"exploring":[270],"landscape":[273],"entire":[275],"genomes.":[277]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
