{"id":"https://openalex.org/W4391561379","doi":"https://doi.org/10.1038/s42256-023-00788-1","title":"Leveraging large language models for predictive chemistry","display_name":"Leveraging large language models for predictive chemistry","publication_year":2024,"publication_date":"2024-02-06","ids":{"openalex":"https://openalex.org/W4391561379","doi":"https://doi.org/10.1038/s42256-023-00788-1"},"language":"en","primary_location":{"id":"doi:10.1038/s42256-023-00788-1","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s42256-023-00788-1","pdf_url":"https://www.nature.com/articles/s42256-023-00788-1.pdf","source":{"id":"https://openalex.org/S2912241403","display_name":"Nature Machine Intelligence","issn_l":"2522-5839","issn":["2522-5839"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Nature Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.nature.com/articles/s42256-023-00788-1.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027355573","display_name":"Kevin Maik Jablonka","orcid":"https://orcid.org/0000-0003-4894-4660"},"institutions":[{"id":"https://openalex.org/I1285013897","display_name":"Helmholtz Institute Jena","ror":"https://ror.org/02rzw6h69","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1285013897","https://openalex.org/I1305996414","https://openalex.org/I169556180"]},{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]},{"id":"https://openalex.org/I76198965","display_name":"Friedrich Schiller University Jena","ror":"https://ror.org/05qpz1x62","country_code":"DE","type":"education","lineage":["https://openalex.org/I76198965"]}],"countries":["CH","DE"],"is_corresponding":true,"raw_author_name":"Kevin Maik Jablonka","raw_affiliation_strings":["Center for Energy and Environmental Chemistry Jena (CEEC Jena), Friedrich Schiller University Jena, Jena, Germany","Helmholtz Institute for Polymers in Energy Applications, Jena, Germany","Laboratory of Molecular Simulation (LSMO), Institut des Sciences et Ing\u00e9nierie Chimiques, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Sion, Switzerland","Laboratory of Organic and Macromolecular Chemistry (IOMC), Friedrich Schiller University Jena, Jena, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Energy and Environmental Chemistry Jena (CEEC Jena), Friedrich Schiller University Jena, Jena, Germany","institution_ids":["https://openalex.org/I76198965"]},{"raw_affiliation_string":"Helmholtz Institute for Polymers in Energy Applications, Jena, Germany","institution_ids":["https://openalex.org/I1285013897"]},{"raw_affiliation_string":"Laboratory of Molecular Simulation (LSMO), Institut des Sciences et Ing\u00e9nierie Chimiques, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Sion, Switzerland","institution_ids":["https://openalex.org/I5124864"]},{"raw_affiliation_string":"Laboratory of Organic and Macromolecular Chemistry (IOMC), Friedrich Schiller University Jena, Jena, Germany","institution_ids":["https://openalex.org/I76198965"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028051805","display_name":"Philippe Schwaller","orcid":"https://orcid.org/0000-0003-3046-6576"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Philippe Schwaller","raw_affiliation_strings":["Laboratory of Artificial Chemical Intelligence (LIAC), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Lausanne, Switzerland"],"raw_orcid":"https://orcid.org/0000-0003-3046-6576","affiliations":[{"raw_affiliation_string":"Laboratory of Artificial Chemical Intelligence (LIAC), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000326701","display_name":"Andres Ortega\u2010Guerrero","orcid":"https://orcid.org/0000-0002-0065-0623"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Andres Ortega-Guerrero","raw_affiliation_strings":["Laboratory of Molecular Simulation (LSMO), Institut des Sciences et Ing\u00e9nierie Chimiques, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Sion, Switzerland"],"raw_orcid":"https://orcid.org/0000-0002-0065-0623","affiliations":[{"raw_affiliation_string":"Laboratory of Molecular Simulation (LSMO), Institut des Sciences et Ing\u00e9nierie Chimiques, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Sion, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075317126","display_name":"Berend Smit","orcid":"https://orcid.org/0000-0003-4653-8562"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Berend Smit","raw_affiliation_strings":["Laboratory of Molecular Simulation (LSMO), Institut des Sciences et Ing\u00e9nierie Chimiques, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Sion, Switzerland"],"raw_orcid":"https://orcid.org/0000-0003-4653-8562","affiliations":[{"raw_affiliation_string":"Laboratory of Molecular Simulation (LSMO), Institut des Sciences et Ing\u00e9nierie Chimiques, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Sion, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5027355573"],"corresponding_institution_ids":["https://openalex.org/I1285013897","https://openalex.org/I5124864","https://openalex.org/I76198965"],"apc_list":{"value":9750,"currency":"EUR","value_usd":11690},"apc_paid":{"value":9750,"currency":"EUR","value_usd":11690},"fwci":34.1868,"has_fulltext":true,"cited_by_count":309,"citation_normalized_percentile":{"value":0.99983678,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"6","issue":"2","first_page":"161","last_page":"169"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.934499979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.43564170598983765},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.35705530643463135}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.43564170598983765},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.35705530643463135}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1038/s42256-023-00788-1","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s42256-023-00788-1","pdf_url":"https://www.nature.com/articles/s42256-023-00788-1.pdf","source":{"id":"https://openalex.org/S2912241403","display_name":"Nature Machine Intelligence","issn_l":"2522-5839","issn":["2522-5839"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Nature Machine Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:infoscience.epfl.ch:309094","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/205515","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"WoS","raw_type":"research article"}],"best_oa_location":{"id":"doi:10.1038/s42256-023-00788-1","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s42256-023-00788-1","pdf_url":"https://www.nature.com/articles/s42256-023-00788-1.pdf","source":{"id":"https://openalex.org/S2912241403","display_name":"Nature Machine Intelligence","issn_l":"2522-5839","issn":["2522-5839"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Nature Machine Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.800000011920929,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1120026416","display_name":null,"funder_award_id":"180544","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G4692142516","display_name":null,"funder_award_id":"51NF40-","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G5285085700","display_name":null,"funder_award_id":"182892","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G7691369275","display_name":null,"funder_award_id":"51NF40-182892","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310266","display_name":"Grantham Foundation for the Protection of the Environment","ror":"https://ror.org/04mm88136"},{"id":"https://openalex.org/F4320320915","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052"},{"id":"https://openalex.org/F4320320924","display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","ror":"https://ror.org/00yjd3n13"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4391561379.pdf"},"referenced_works_count":74,"referenced_works":["https://openalex.org/W1975147762","https://openalex.org/W1975875968","https://openalex.org/W2008505552","https://openalex.org/W2076498053","https://openalex.org/W2096541451","https://openalex.org/W2116105292","https://openalex.org/W2160592148","https://openalex.org/W2295598076","https://openalex.org/W2529996553","https://openalex.org/W2530805533","https://openalex.org/W2554191423","https://openalex.org/W2784918212","https://openalex.org/W2785942661","https://openalex.org/W2789949436","https://openalex.org/W2790960441","https://openalex.org/W2800793736","https://openalex.org/W2883583109","https://openalex.org/W2887447356","https://openalex.org/W2900694120","https://openalex.org/W2905012389","https://openalex.org/W2908350418","https://openalex.org/W2911997094","https://openalex.org/W2947423323","https://openalex.org/W2953641512","https://openalex.org/W2955219525","https://openalex.org/W2963026768","https://openalex.org/W2977044154","https://openalex.org/W2981852735","https://openalex.org/W2997100726","https://openalex.org/W3008588639","https://openalex.org/W3009321976","https://openalex.org/W3023402054","https://openalex.org/W3023937119","https://openalex.org/W3030068589","https://openalex.org/W3030978062","https://openalex.org/W3045928028","https://openalex.org/W3048908832","https://openalex.org/W3100220443","https://openalex.org/W3103092523","https://openalex.org/W3116783766","https://openalex.org/W3118507387","https://openalex.org/W3128429991","https://openalex.org/W3156578609","https://openalex.org/W3163360581","https://openalex.org/W3175955239","https://openalex.org/W3181860256","https://openalex.org/W3198449425","https://openalex.org/W4214868967","https://openalex.org/W4226050570","https://openalex.org/W4226145240","https://openalex.org/W4281619372","https://openalex.org/W4282053982","https://openalex.org/W4283031227","https://openalex.org/W4305016511","https://openalex.org/W4306179830","https://openalex.org/W4311281379","https://openalex.org/W4311409687","https://openalex.org/W4312129726","https://openalex.org/W4318952054","https://openalex.org/W4319310661","https://openalex.org/W4319996831","https://openalex.org/W4362664882","https://openalex.org/W4379184641","https://openalex.org/W4385571886","https://openalex.org/W4385572894","https://openalex.org/W4385671288","https://openalex.org/W4386168831","https://openalex.org/W6739901393","https://openalex.org/W6778883912","https://openalex.org/W6800751262","https://openalex.org/W6810081322","https://openalex.org/W6849941170","https://openalex.org/W6913078555","https://openalex.org/W6967534406"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W2948807893","https://openalex.org/W2778153218","https://openalex.org/W2748952813","https://openalex.org/W1531601525","https://openalex.org/W2078814861","https://openalex.org/W2527526854","https://openalex.org/W2062208111","https://openalex.org/W1976181487","https://openalex.org/W1986764834"],"abstract_inverted_index":{"Abstract":[0],"Machine":[1],"learning":[2,30,99,132,171],"has":[3,8],"transformed":[4],"many":[5,102],"fields":[6],"and":[7,14,75,109,156,175],"recently":[9],"found":[10,21],"applications":[11,103],"in":[12,22,73,85,134,136,172,205],"chemistry":[13,23,74],"materials":[15,76,110],"science.":[16],"The":[17,152],"small":[18,161],"datasets":[19],"commonly":[20],"sparked":[24],"the":[25,63,89,105,112,137,150,165,173,201],"development":[26],"of":[27,59,107,114,121,154],"sophisticated":[28],"machine":[29,98,131,170],"approaches":[31],"that":[32,49],"incorporate":[33],"chemical":[34,83,115,174],"knowledge":[35,203],"for":[36,101,160,214],"each":[37],"application":[38],"and,":[39],"therefore,":[40],"require":[41],"specialized":[42],"expertise":[43],"to":[44,69,81,111,126,168,180,195,210],"develop.":[45],"Here":[46],"we":[47,142],"show":[48],"GPT-3,":[50],"a":[51,181,185,192,197,212],"large":[52,187],"language":[53,87,188],"model":[54,189],"trained":[55],"on":[56],"vast":[57],"amounts":[58],"text":[60],"extracted":[61],"from":[62],"Internet,":[64],"can":[65,123,143,163],"easily":[66],"be":[67],"adapted":[68],"solve":[70],"various":[71],"tasks":[72],"science":[77],"by":[78,147,199],"fine-tuning":[79],"it":[80],"answer":[82],"questions":[84],"natural":[86],"with":[88,96],"correct":[90],"answer.":[91],"We":[92],"compared":[93],"this":[94],"approach":[95,167],"dedicated":[97],"models":[100],"spanning":[104],"properties":[106],"molecules":[108],"yield":[113],"reactions.":[116],"Surprisingly,":[117],"our":[118],"fine-tuned":[119],"version":[120],"GPT-3":[122],"perform":[124,144],"comparably":[125],"or":[127,209],"even":[128],"outperform":[129],"conventional":[130],"techniques,":[133],"particular":[135],"low-data":[138],"limit.":[139],"In":[140,178],"addition,":[141],"inverse":[145],"design":[146],"simply":[148],"inverting":[149],"questions.":[151],"ease":[153],"use":[155],"high":[157],"performance,":[158],"especially":[159],"datasets,":[162],"impact":[164],"fundamental":[166],"using":[169],"material":[176],"sciences.":[177],"addition":[179],"literature":[182],"search,":[183],"querying":[184],"pre-trained":[186],"might":[190],"become":[191],"routine":[193],"way":[194],"bootstrap":[196],"project":[198],"leveraging":[200],"collective":[202],"encoded":[204],"these":[206],"foundation":[207],"models,":[208],"provide":[211],"baseline":[213],"predictive":[215],"tasks.":[216]},"counts_by_year":[{"year":2026,"cited_by_count":61},{"year":2025,"cited_by_count":174},{"year":2024,"cited_by_count":74}],"updated_date":"2026-05-13T08:25:38.343686","created_date":"2025-10-10T00:00:00"}
