{"id":"https://openalex.org/W4394998126","doi":"https://doi.org/10.1007/s10822-024-00559-z","title":"De novo drug design as GPT language modeling: large chemistry models with supervised and reinforcement learning","display_name":"De novo drug design as GPT language modeling: large chemistry models with supervised and reinforcement learning","publication_year":2024,"publication_date":"2024-04-22","ids":{"openalex":"https://openalex.org/W4394998126","doi":"https://doi.org/10.1007/s10822-024-00559-z","pmid":"https://pubmed.ncbi.nlm.nih.gov/38647700"},"language":"en","primary_location":{"id":"doi:10.1007/s10822-024-00559-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10822-024-00559-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10822-024-00559-z.pdf","source":{"id":"https://openalex.org/S64621741","display_name":"Journal of Computer-Aided Molecular Design","issn_l":"0920-654X","issn":["0920-654X","1573-4951"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computer-Aided Molecular Design","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10822-024-00559-z.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108921770","display_name":"Gavin Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I4210154783","display_name":"Grammar School","ror":"https://ror.org/053g7ab72","country_code":"SK","type":"education","lineage":["https://openalex.org/I4210154783"]},{"id":"https://openalex.org/I2799496773","display_name":"University Prep","ror":"https://ror.org/056hgyx35","country_code":"US","type":"education","lineage":["https://openalex.org/I2799496773"]}],"countries":["SK","US"],"is_corresponding":true,"raw_author_name":"Gavin Ye","raw_affiliation_strings":["Columbia Grammar & Preparatory School, New York, NY, USA. yeeeyee004@gmail.com","Columbia Grammar & Preparatory School, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Columbia Grammar & Preparatory School, New York, NY, USA. yeeeyee004@gmail.com","institution_ids":["https://openalex.org/I4210154783"]},{"raw_affiliation_string":"Columbia Grammar & Preparatory School, New York, NY, USA","institution_ids":["https://openalex.org/I2799496773"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5108921770"],"corresponding_institution_ids":["https://openalex.org/I2799496773","https://openalex.org/I4210154783"],"apc_list":{"value":2890,"currency":"EUR","value_usd":3690},"apc_paid":{"value":2890,"currency":"EUR","value_usd":3690},"fwci":8.2928,"has_fulltext":true,"cited_by_count":23,"citation_normalized_percentile":{"value":0.98097911,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"38","issue":"1","first_page":"20","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7021470665931702},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6932797431945801},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5358802676200867},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5271527171134949},{"id":"https://openalex.org/keywords/drug-discovery","display_name":"Drug discovery","score":0.5032557845115662},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.499436616897583},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.1885630190372467}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7021470665931702},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6932797431945801},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5358802676200867},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5271527171134949},{"id":"https://openalex.org/C74187038","wikidata":"https://www.wikidata.org/wiki/Q1418791","display_name":"Drug discovery","level":2,"score":0.5032557845115662},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.499436616897583},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.1885630190372467},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069553","descriptor_name":"Supervised Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069553","descriptor_name":"Supervised Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069553","descriptor_name":"Supervised Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069553","descriptor_name":"Supervised Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069553","descriptor_name":"Supervised Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069553","descriptor_name":"Supervised Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069553","descriptor_name":"Supervised Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008956","descriptor_name":"Models, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008956","descriptor_name":"Models, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008956","descriptor_name":"Models, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008956","descriptor_name":"Models, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008956","descriptor_name":"Models, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008956","descriptor_name":"Models, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008956","descriptor_name":"Models, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015195","descriptor_name":"Drug Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015195","descriptor_name":"Drug Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015195","descriptor_name":"Drug Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015195","descriptor_name":"Drug Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015195","descriptor_name":"Drug Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015195","descriptor_name":"Drug Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015195","descriptor_name":"Drug Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":3,"locations":[{"id":"doi:10.1007/s10822-024-00559-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10822-024-00559-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10822-024-00559-z.pdf","source":{"id":"https://openalex.org/S64621741","display_name":"Journal of Computer-Aided Molecular Design","issn_l":"0920-654X","issn":["0920-654X","1573-4951"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computer-Aided Molecular Design","raw_type":"journal-article"},{"id":"pmid:38647700","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38647700","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of computer-aided molecular design","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:11035455","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/11035455","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11035455/pdf/10822_2024_Article_559.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Comput Aided Mol Des","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1007/s10822-024-00559-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10822-024-00559-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10822-024-00559-z.pdf","source":{"id":"https://openalex.org/S64621741","display_name":"Journal of Computer-Aided Molecular Design","issn_l":"0920-654X","issn":["0920-654X","1573-4951"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computer-Aided Molecular Design","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4394998126.pdf"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W1975147762","https://openalex.org/W2037493760","https://openalex.org/W2096864392","https://openalex.org/W2169303530","https://openalex.org/W2273267066","https://openalex.org/W2773987374","https://openalex.org/W2777416523","https://openalex.org/W2909063104","https://openalex.org/W2949676527","https://openalex.org/W3009321976","https://openalex.org/W3013657235","https://openalex.org/W3043969542","https://openalex.org/W3045928028","https://openalex.org/W3047278342","https://openalex.org/W3100751385","https://openalex.org/W3109549311","https://openalex.org/W3161951461","https://openalex.org/W3212496002","https://openalex.org/W4226278401","https://openalex.org/W4280597794","https://openalex.org/W4283586469","https://openalex.org/W4292779060","https://openalex.org/W4300961340","https://openalex.org/W4307468223","https://openalex.org/W4308043267","https://openalex.org/W4308068572","https://openalex.org/W4321392130","https://openalex.org/W4362664882","https://openalex.org/W4379468083","https://openalex.org/W4384918448","https://openalex.org/W4387692254","https://openalex.org/W4389157038"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"In":[0,84],"recent":[1],"years,":[2],"generative":[3],"machine":[4],"learning":[5],"algorithms":[6],"have":[7,50,59],"been":[8,62],"successful":[9],"in":[10,21,79,186],"designing":[11],"innovative":[12],"drug-like":[13],"molecules.":[14,224,243],"SMILES":[15,131],"is":[16,82,91],"a":[17,69,94],"sequence-like":[18],"language":[19,48,96],"used":[20,112],"most":[22],"effective":[23],"drug":[24,57,80,88,118,153,187,202,242],"design":[25,41,58,89,154,228],"models.":[26],"Due":[27],"to":[28,76,113,117,121,145,201,219,227,241],"data's":[29],"sequential":[30],"structure,":[31],"models":[32,49],"such":[33],"as":[34,93],"recurrent":[35],"neural":[36],"networks":[37],"and":[38,107,125,178,221],"transformers":[39],"can":[40],"pharmacological":[42],"compounds":[43],"with":[44,133,158,170,189,212],"optimized":[45],"efficacy.":[46],"Large":[47],"advanced":[51],"recently,":[52],"but":[53],"their":[54],"implications":[55],"on":[56],"not":[60],"yet":[61],"explored.":[63],"Although":[64],"one":[65],"study":[66],"successfully":[67],"pre-trained":[68],"large":[70],"chemistry":[71],"model":[72,140,155],"(LCM),":[73],"its":[74,142],"application":[75],"specific":[77],"tasks":[78],"discovery":[81,203],"unknown.":[83],"this":[85],"study,":[86],"the":[87,100,115,130,136,152,166,173,182,205,234],"task":[90],"modeled":[92],"causal":[95],"modeling":[97],"problem.":[98],"Thus,":[99],"procedure":[101],"of":[102,172,184,199],"reward":[103],"modeling,":[104],"supervised":[105],"fine-tuning,":[106],"proximal":[108,149],"policy":[109,150],"optimization":[110],"was":[111],"transfer":[114],"LCM":[116],"design,":[119],"similar":[120,229],"Open":[122],"AI's":[123],"ChatGPT":[124],"InstructGPT":[126],"procedures.":[127],"By":[128],"combining":[129],"sequence":[132],"chemical":[134],"descriptors,":[135],"novel":[137],"efficacy":[138,161],"evaluation":[139],"exceeded":[141],"performance":[143],"compared":[144],"previous":[146],"studies.":[147],"After":[148],"optimization,":[151],"generated":[156,174],"molecules":[157,175,230],"99.2%":[159],"having":[160],"pIC<sub>50</sub>":[162],">":[163],"7":[164],"towards":[165],"amyloid":[167],"precursor":[168],"protein,":[169],"100%":[171],"being":[176],"valid":[177],"novel.":[179],"This":[180],"demonstrated":[181],"applicability":[183,198],"LCMs":[185,200,220],"discovery,":[188],"benefits":[190],"including":[191],"less":[192],"data":[193],"consumption":[194],"while":[195],"fine-tuning.":[196],"The":[197],"opens":[204],"door":[206],"for":[207,236],"larger":[208],"studies":[209],"involving":[210],"reinforcement-learning":[211],"human":[213],"feedback,":[214],"where":[215],"chemists":[216],"provide":[217],"feedback":[218],"generate":[222],"higher-quality":[223],"LCMs'":[225],"ability":[226],"from":[231],"datasets":[232],"paves":[233],"way":[235],"more":[237],"accessible,":[238],"non-patented":[239],"alternatives":[240]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-31T07:56:22.981413","created_date":"2025-10-10T00:00:00"}
