{"id":"https://openalex.org/W4388183199","doi":"https://doi.org/10.48550/arxiv.2310.19915","title":"GPCR-BERT: Interpreting Sequential Design of G Protein Coupled Receptors Using Protein Language Models","display_name":"GPCR-BERT: Interpreting Sequential Design of G Protein Coupled Receptors Using Protein Language Models","publication_year":2023,"publication_date":"2023-10-30","ids":{"openalex":"https://openalex.org/W4388183199","doi":"https://doi.org/10.48550/arxiv.2310.19915"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2310.19915","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.19915","pdf_url":"https://arxiv.org/pdf/2310.19915","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2310.19915","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014201244","display_name":"Seong-Won Kim","orcid":"https://orcid.org/0000-0003-2822-7365"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kim, Seongwon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047372785","display_name":"Parisa Mollaei","orcid":"https://orcid.org/0000-0002-4711-9012"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mollaei, Parisa","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111016398","display_name":"Akshay Antony","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Antony, Akshay","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011452751","display_name":"Rishikesh Magar","orcid":"https://orcid.org/0000-0001-6216-0518"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Magar, Rishikesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5008745801","display_name":"Amir Barati Farimani","orcid":"https://orcid.org/0000-0002-2952-8576"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Farimani, Amir Barati","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5014201244"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11178","display_name":"Receptor Mechanisms and Signaling","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10911","display_name":"Chemical Synthesis and Analysis","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/g-protein-coupled-receptor","display_name":"G protein-coupled receptor","score":0.7950150966644287},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.6289539337158203},{"id":"https://openalex.org/keywords/receptor","display_name":"Receptor","score":0.45104148983955383},{"id":"https://openalex.org/keywords/motif","display_name":"Motif (music)","score":0.44615912437438965},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4325698912143707},{"id":"https://openalex.org/keywords/protein-design","display_name":"Protein design","score":0.431509792804718},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3556317687034607},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.3282795548439026},{"id":"https://openalex.org/keywords/protein-structure","display_name":"Protein structure","score":0.310483455657959},{"id":"https://openalex.org/keywords/biochemistry","display_name":"Biochemistry","score":0.23129665851593018},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09111741185188293}],"concepts":[{"id":"https://openalex.org/C135285700","wikidata":"https://www.wikidata.org/wiki/Q38173","display_name":"G protein-coupled receptor","level":3,"score":0.7950150966644287},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.6289539337158203},{"id":"https://openalex.org/C170493617","wikidata":"https://www.wikidata.org/wiki/Q208467","display_name":"Receptor","level":2,"score":0.45104148983955383},{"id":"https://openalex.org/C32276052","wikidata":"https://www.wikidata.org/wiki/Q908349","display_name":"Motif (music)","level":2,"score":0.44615912437438965},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4325698912143707},{"id":"https://openalex.org/C152769699","wikidata":"https://www.wikidata.org/wiki/Q410814","display_name":"Protein design","level":3,"score":0.431509792804718},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3556317687034607},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.3282795548439026},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.310483455657959},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.23129665851593018},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09111741185188293},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2310.19915","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.19915","pdf_url":"https://arxiv.org/pdf/2310.19915","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"pmh:oai:figshare.com:article/25201255","is_oa":true,"landing_page_url":"https://figshare.com/articles/journal_contribution/GPCR-BERT_Interpreting_Sequential_Design_of_G_Protein-Coupled_Receptors_Using_Protein_Language_Models/25201255","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"doi:10.48550/arxiv.2310.19915","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2310.19915","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2310.19915","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.19915","pdf_url":"https://arxiv.org/pdf/2310.19915","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6499999761581421}],"awards":[],"funders":[{"id":"https://openalex.org/F4320310207","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33"},{"id":"https://openalex.org/F4320337968","display_name":"Center for Machine Learning and Health, School of Computer Science, Carnegie Mellon University","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1988730649","https://openalex.org/W2170120749","https://openalex.org/W2067576503","https://openalex.org/W2199134476","https://openalex.org/W2170285690","https://openalex.org/W4372348062","https://openalex.org/W4390587557","https://openalex.org/W3216044760","https://openalex.org/W2968494487","https://openalex.org/W3180927879"],"abstract_inverted_index":{"With":[0],"the":[1,17,27,48,54,63,68,78,95,113,126,140,146,162,169,178,194,198,209,213,216],"rise":[2],"of":[3,21,41,50,71,80,83,91,123,145,155,161,171,173,180,200,215],"Transformers":[4],"and":[5,12,19,37,103,118,143,158],"Large":[6],"Language":[7],"Models":[8],"(LLMs)":[9],"in":[10,44,125,139,176,189],"Chemistry":[11],"Biology,":[13],"new":[14],"avenues":[15],"for":[16,66],"design":[18,70],"understanding":[20,67,93],"therapeutics":[22],"have":[23],"opened":[24],"up":[25],"to":[26,53,131,167,207],"scientific":[28],"community.":[29],"Protein":[30],"sequences":[31],"can":[32,38],"be":[33],"modeled":[34],"as":[35,107],"language":[36],"take":[39],"advantage":[40,154],"recent":[42],"advances":[43],"LLMs,":[45],"specifically":[46],"with":[47,120],"abundance":[49],"our":[51],"access":[52],"protein":[55,115],"sequence":[56],"datasets.":[57],"In":[58,196],"this":[59],"paper,":[60],"we":[61,128,152],"developed":[62],"GPCR-BERT":[64],"model":[65,116,163],"sequential":[69],"G":[72],"Protein-Coupled":[73],"Receptors":[74],"(GPCRs).":[75],"GPCRs":[76],"are":[77,165],"target":[79],"over":[81,204],"one-third":[82],"FDA-approved":[84],"pharmaceuticals.":[85],"However,":[86],"there":[87],"is":[88],"a":[89],"lack":[90],"comprehensive":[92],"regarding":[94],"relationship":[96],"between":[97,137],"amino":[98,174],"acid":[99],"sequence,":[100],"ligand":[101],"selectivity,":[102],"conformational":[104],"motifs":[105],"(such":[106],"NPxxY,":[108],"CWxP,":[109],"E/DRY).":[110],"By":[111],"utilizing":[112],"pre-trained":[114],"(Prot-Bert)":[117],"fine-tuning":[119],"prediction":[121],"tasks":[122],"variations":[124],"motifs,":[127],"were":[129],"able":[130],"shed":[132],"light":[133],"on":[134],"several":[135],"relationships":[136],"residues":[138,192],"binding":[141],"pocket":[142],"some":[144],"conserved":[147],"motifs.":[148,195],"To":[149],"achieve":[150],"this,":[151],"took":[153],"attention":[156],"weights,":[157],"hidden":[159,191],"states":[160],"that":[164],"interpreted":[166],"extract":[168],"extent":[170],"contributions":[172],"acids":[175],"dictating":[177],"type":[179],"masked":[181],"ones.":[182],"The":[183],"fine-tuned":[184],"models":[185],"demonstrated":[186],"high":[187],"accuracy":[188],"predicting":[190],"within":[193,212],"addition,":[197],"analysis":[199],"embedding":[201],"was":[202],"performed":[203],"3D":[205],"structures":[206],"elucidate":[208],"higher-order":[210],"interactions":[211],"conformations":[214],"receptors.":[217]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2023-11-02T00:00:00"}
