{"id":"https://openalex.org/W4387023440","doi":"https://doi.org/10.1186/s13321-023-00759-z","title":"Probabilistic generative transformer language models for generative design of molecules","display_name":"Probabilistic generative transformer language models for generative design of molecules","publication_year":2023,"publication_date":"2023-09-25","ids":{"openalex":"https://openalex.org/W4387023440","doi":"https://doi.org/10.1186/s13321-023-00759-z","pmid":"https://pubmed.ncbi.nlm.nih.gov/37749655"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-023-00759-z","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00759-z","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00759-z","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00759-z","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025355763","display_name":"Lai Wei","orcid":"https://orcid.org/0000-0003-0344-8540"},"institutions":[{"id":"https://openalex.org/I155781252","display_name":"University of South Carolina","ror":"https://ror.org/02b6qw903","country_code":"US","type":"education","lineage":["https://openalex.org/I155781252"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Lai Wei","raw_affiliation_strings":["Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, 29201, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, 29201, USA","institution_ids":["https://openalex.org/I155781252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048652419","display_name":"Nihang Fu","orcid":"https://orcid.org/0009-0005-9872-9530"},"institutions":[{"id":"https://openalex.org/I155781252","display_name":"University of South Carolina","ror":"https://ror.org/02b6qw903","country_code":"US","type":"education","lineage":["https://openalex.org/I155781252"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nihang Fu","raw_affiliation_strings":["Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, 29201, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, 29201, USA","institution_ids":["https://openalex.org/I155781252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039446470","display_name":"Yuqi Song","orcid":"https://orcid.org/0009-0000-8148-9212"},"institutions":[{"id":"https://openalex.org/I155781252","display_name":"University of South Carolina","ror":"https://ror.org/02b6qw903","country_code":"US","type":"education","lineage":["https://openalex.org/I155781252"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuqi Song","raw_affiliation_strings":["Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, 29201, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, 29201, USA","institution_ids":["https://openalex.org/I155781252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100391049","display_name":"Qian Wang","orcid":"https://orcid.org/0000-0002-2812-609X"},"institutions":[{"id":"https://openalex.org/I155781252","display_name":"University of South Carolina","ror":"https://ror.org/02b6qw903","country_code":"US","type":"education","lineage":["https://openalex.org/I155781252"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qian Wang","raw_affiliation_strings":["Department of Chemistry and Biochemistry, University of South Carolina, Columbia, SC, 29201, USA"],"affiliations":[{"raw_affiliation_string":"Department of Chemistry and Biochemistry, University of South Carolina, Columbia, SC, 29201, USA","institution_ids":["https://openalex.org/I155781252"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060537711","display_name":"Jianjun Hu","orcid":"https://orcid.org/0000-0002-8725-6660"},"institutions":[{"id":"https://openalex.org/I155781252","display_name":"University of South Carolina","ror":"https://ror.org/02b6qw903","country_code":"US","type":"education","lineage":["https://openalex.org/I155781252"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jianjun Hu","raw_affiliation_strings":["Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, 29201, USA. jianjunh@cse.sc.edu","Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, 29201, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, 29201, USA. jianjunh@cse.sc.edu","institution_ids":[]},{"raw_affiliation_string":"Department of Computer Science and Engineering, University of South Carolina, Columbia, SC, 29201, USA","institution_ids":["https://openalex.org/I155781252"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5025355763"],"corresponding_institution_ids":["https://openalex.org/I155781252"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":1.7431,"has_fulltext":true,"cited_by_count":18,"citation_normalized_percentile":{"value":0.84095357,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"15","issue":"1","first_page":"88","last_page":"88"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.984499990940094,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.7870946526527405},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7227433919906616},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6889809370040894},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6101680994033813},{"id":"https://openalex.org/keywords/generative-design","display_name":"Generative Design","score":0.5783739686012268},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5337461233139038},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42511457204818726},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.42000800371170044},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.361247718334198},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12547940015792847},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.07605531811714172}],"concepts":[{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.7870946526527405},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7227433919906616},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6889809370040894},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6101680994033813},{"id":"https://openalex.org/C184408114","wikidata":"https://www.wikidata.org/wiki/Q1502022","display_name":"Generative Design","level":3,"score":0.5783739686012268},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5337461233139038},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42511457204818726},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.42000800371170044},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.361247718334198},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12547940015792847},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.07605531811714172},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s13321-023-00759-z","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00759-z","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00759-z","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:37749655","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37749655","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:10518939","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/10518939","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC10518939/pdf/13321_2023_Article_759.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Cheminform","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:3f30fcee6ff54da3b8668c8300d5af37","is_oa":true,"landing_page_url":"https://doaj.org/article/3f30fcee6ff54da3b8668c8300d5af37","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 15, Iss 1, Pp 1-15 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s13321-023-00759-z","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00759-z","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00759-z","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7599999904632568,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387023440.pdf"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W1975147762","https://openalex.org/W1981276685","https://openalex.org/W1988037271","https://openalex.org/W2022476850","https://openalex.org/W2034549041","https://openalex.org/W2037825667","https://openalex.org/W2060531713","https://openalex.org/W2076809861","https://openalex.org/W2763220183","https://openalex.org/W2794994220","https://openalex.org/W2887447356","https://openalex.org/W2900694120","https://openalex.org/W2909240409","https://openalex.org/W2953128081","https://openalex.org/W2956961449","https://openalex.org/W3009321976","https://openalex.org/W3011847211","https://openalex.org/W3036527662","https://openalex.org/W3045928028","https://openalex.org/W3094686696","https://openalex.org/W3099414221","https://openalex.org/W3103753836","https://openalex.org/W3104088487","https://openalex.org/W3104956673","https://openalex.org/W3116865743","https://openalex.org/W3121311390","https://openalex.org/W3138781613","https://openalex.org/W3165171933","https://openalex.org/W3195604886","https://openalex.org/W4213349615","https://openalex.org/W4229590462","https://openalex.org/W4281619372","https://openalex.org/W4283071220","https://openalex.org/W4308510017","https://openalex.org/W4313485929","https://openalex.org/W4319310661","https://openalex.org/W4361198756","https://openalex.org/W4380225176","https://openalex.org/W4401339550","https://openalex.org/W6601013545","https://openalex.org/W6601022194","https://openalex.org/W6603707631","https://openalex.org/W6609581451","https://openalex.org/W6610423178","https://openalex.org/W6702248584","https://openalex.org/W6764431594"],"related_works":["https://openalex.org/W775311126","https://openalex.org/W1517876498","https://openalex.org/W4300030714","https://openalex.org/W2489288131","https://openalex.org/W4365211920","https://openalex.org/W4317695495","https://openalex.org/W4380551139","https://openalex.org/W4388137171","https://openalex.org/W1967909251","https://openalex.org/W4283803360"],"abstract_inverted_index":{"Self-supervised":[0],"neural":[1,71],"language":[2,87],"models":[3,38,117],"have":[4,48,131],"recently":[5],"found":[6],"wide":[7],"applications":[8],"in":[9,99,134],"the":[10,34,64,84,101,113,132,154],"generative":[11,75],"design":[12,41,59,76,138],"of":[13,33,77,143],"organic":[14],"molecules":[15,149],"and":[16,28,47,108,121,162],"protein":[17],"sequences":[18],"as":[19,21],"well":[20],"representation":[22],"learning":[23,37,100],"for":[24,39,74,91],"downstream":[25],"structure":[26],"classification":[27],"functional":[29],"prediction.":[30],"However,":[31],"most":[32],"existing":[35,148],"deep":[36],"molecule":[40,137,157],"usually":[42],"require":[43],"a":[44,49,69],"big":[45],"dataset":[46],"black-box":[50],"architecture,":[51],"which":[52,94],"makes":[53],"it":[54],"difficult":[55],"to":[56,124,140,146],"interpret":[57],"their":[58,141],"logic.":[60],"Here":[61],"we":[62],"propose":[63],"Generative":[65],"Molecular":[66],"Transformer":[67],"(GMTransformer),":[68],"probabilistic":[70,128],"network":[72],"model":[73,80,88],"molecules.":[78],"Our":[79],"is":[81],"built":[82],"on":[83,112],"blank":[85],"filling":[86],"originally":[89],"developed":[90],"text":[92],"processing,":[93],"has":[95],"demonstrated":[96],"unique":[97],"advantages":[98],"\"molecules":[102],"grammars\"":[103],"with":[104,136,150],"high-quality":[105],"generation,":[106],"interpretability,":[107],"data":[109],"efficiency.":[110],"Benchmarked":[111],"MOSES":[114],"datasets,":[115],"our":[116],"achieve":[118],"high":[119],"novelty":[120],"Scaf":[122],"compared":[123],"other":[125],"baselines.":[126],"The":[127,159],"generation":[129],"steps":[130],"potential":[133],"tinkering":[135],"due":[139],"capability":[142],"recommending":[144],"how":[145],"modify":[147],"explanation,":[151],"guided":[152],"by":[153],"learned":[155],"implicit":[156],"chemistry.":[158],"source":[160],"code":[161],"datasets":[163],"can":[164],"be":[165],"accessed":[166],"freely":[167],"at":[168],"https://github.com/usccolumbia/GMTransformer.":[169]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":7}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
