{"id":"https://openalex.org/W7133508051","doi":"https://doi.org/10.1021/acs.jcim.5c03062","title":"Improving Fidelity and Diversity in Chemical Language Transformers for Inverse Molecular Design","display_name":"Improving Fidelity and Diversity in Chemical Language Transformers for Inverse Molecular Design","publication_year":2026,"publication_date":"2026-03-04","ids":{"openalex":"https://openalex.org/W7133508051","doi":"https://doi.org/10.1021/acs.jcim.5c03062","pmid":"https://pubmed.ncbi.nlm.nih.gov/41777180"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.5c03062","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c03062","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research.manchester.ac.uk/en/publications/30ef8b7f-affb-4a09-ac6a-7f591850d5d3","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041541416","display_name":"Alexander W. Rogers","orcid":"https://orcid.org/0000-0003-2298-6520"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Alexander W. Rogers","raw_affiliation_strings":["Department of Chemical Engineering, The University of Manchester, Oxford Road, Manchester M1 3AL, U.K"],"raw_orcid":"https://orcid.org/0000-0003-2298-6520","affiliations":[{"raw_affiliation_string":"Department of Chemical Engineering, The University of Manchester, Oxford Road, Manchester M1 3AL, U.K","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084067191","display_name":"Ruediger Zillmer","orcid":null},"institutions":[{"id":"https://openalex.org/I1342131907","display_name":"Unilever (United Kingdom)","ror":"https://ror.org/05n8ah907","country_code":"GB","type":"company","lineage":["https://openalex.org/I1342131907"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ruediger Zillmer","raw_affiliation_strings":["Unilever R&D Port Sunlight, Bromborough Road, Bebington, Wirral CH63 3JW, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Unilever R&D Port Sunlight, Bromborough Road, Bebington, Wirral CH63 3JW, U.K","institution_ids":["https://openalex.org/I1342131907"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050029481","display_name":"Amanda Lane","orcid":null},"institutions":[{"id":"https://openalex.org/I1342131907","display_name":"Unilever (United Kingdom)","ror":"https://ror.org/05n8ah907","country_code":"GB","type":"company","lineage":["https://openalex.org/I1342131907"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Amanda Lane","raw_affiliation_strings":["Unilever R&D Port Sunlight, Bromborough Road, Bebington, Wirral CH63 3JW, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Unilever R&D Port Sunlight, Bromborough Road, Bebington, Wirral CH63 3JW, U.K","institution_ids":["https://openalex.org/I1342131907"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128084559","display_name":"Adam Kowalski","orcid":null},"institutions":[{"id":"https://openalex.org/I1342131907","display_name":"Unilever (United Kingdom)","ror":"https://ror.org/05n8ah907","country_code":"GB","type":"company","lineage":["https://openalex.org/I1342131907"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Adam Kowalski","raw_affiliation_strings":["Unilever R&D Port Sunlight, Bromborough Road, Bebington, Wirral CH63 3JW, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Unilever R&D Port Sunlight, Bromborough Road, Bebington, Wirral CH63 3JW, U.K","institution_ids":["https://openalex.org/I1342131907"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060238679","display_name":"Dongda Zhang","orcid":"https://orcid.org/0000-0001-5956-4618"},"institutions":[{"id":"https://openalex.org/I1342131907","display_name":"Unilever (United Kingdom)","ror":"https://ror.org/05n8ah907","country_code":"GB","type":"company","lineage":["https://openalex.org/I1342131907"]},{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Dongda Zhang","raw_affiliation_strings":["Department of Chemical Engineering, The University of Manchester, Oxford Road, Manchester M1 3AL, U.K","Unilever R&D Port Sunlight, Bromborough Road, Bebington, Wirral CH63 3JW, U.K"],"raw_orcid":"https://orcid.org/0000-0001-5956-4618","affiliations":[{"raw_affiliation_string":"Department of Chemical Engineering, The University of Manchester, Oxford Road, Manchester M1 3AL, U.K","institution_ids":["https://openalex.org/I28407311"]},{"raw_affiliation_string":"Unilever R&D Port Sunlight, Bromborough Road, Bebington, Wirral CH63 3JW, U.K","institution_ids":["https://openalex.org/I1342131907"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5060238679"],"corresponding_institution_ids":["https://openalex.org/I1342131907","https://openalex.org/I28407311"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.41751583,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"66","issue":"6","first_page":"3059","last_page":"3073"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.6888999938964844,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.6888999938964844,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.23829999566078186,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10911","display_name":"Chemical Synthesis and Analysis","score":0.01360000018030405,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.7797999978065491},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.6128000020980835},{"id":"https://openalex.org/keywords/inverse","display_name":"Inverse","score":0.538100004196167},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.43220001459121704},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4171000123023987},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.41429999470710754},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.38109999895095825}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.7797999978065491},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7160999774932861},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.6128000020980835},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.538100004196167},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.43220001459121704},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4171000123023987},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.41429999470710754},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.38109999895095825},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37220001220703125},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.3483000099658966},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34380000829696655},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.335099995136261},{"id":"https://openalex.org/C99726746","wikidata":"https://www.wikidata.org/wiki/Q906396","display_name":"Chemical space","level":3,"score":0.32269999384880066},{"id":"https://openalex.org/C135252773","wikidata":"https://www.wikidata.org/wiki/Q1567213","display_name":"Inverse problem","level":2,"score":0.3199999928474426},{"id":"https://openalex.org/C2983319462","wikidata":"https://www.wikidata.org/wiki/Q15948129","display_name":"Inverse method","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C124223222","wikidata":"https://www.wikidata.org/wiki/Q2281940","display_name":"Chemical process","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.288100004196167},{"id":"https://openalex.org/C1893757","wikidata":"https://www.wikidata.org/wiki/Q3653001","display_name":"Inversion (geology)","level":3,"score":0.26899999380111694},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.25110000371932983}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008956","descriptor_name":"Models, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015195","descriptor_name":"Drug Design","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1021/acs.jcim.5c03062","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c03062","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:41777180","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41777180","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/30ef8b7f-affb-4a09-ac6a-7f591850d5d3","is_oa":true,"landing_page_url":"https://research.manchester.ac.uk/en/publications/30ef8b7f-affb-4a09-ac6a-7f591850d5d3","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Rogers, A W, Zillmer, R, Lane, A, Kowalski, A & Zhang, D 2026, 'Improving Fidelity and Diversity in Chemical Language Transformers for Inverse Molecular Design', Journal of Chemical Information and Modeling, vol. 66, no. 6, pp. 3059-3073. https://doi.org/10.1021/acs.jcim.5c03062","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/30ef8b7f-affb-4a09-ac6a-7f591850d5d3","is_oa":true,"landing_page_url":"https://research.manchester.ac.uk/en/publications/30ef8b7f-affb-4a09-ac6a-7f591850d5d3","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Rogers, A W, Zillmer, R, Lane, A, Kowalski, A & Zhang, D 2026, 'Improving Fidelity and Diversity in Chemical Language Transformers for Inverse Molecular Design', Journal of Chemical Information and Modeling, vol. 66, no. 6, pp. 3059-3073. https://doi.org/10.1021/acs.jcim.5c03062","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320313276","display_name":"Society of Chemical Industry","ror":"https://ror.org/05p56yz34"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W1220159786","https://openalex.org/W1520121841","https://openalex.org/W1975147762","https://openalex.org/W2022476850","https://openalex.org/W2073503722","https://openalex.org/W2151697120","https://openalex.org/W2160592148","https://openalex.org/W2503348119","https://openalex.org/W2529996553","https://openalex.org/W2578240541","https://openalex.org/W2606780347","https://openalex.org/W2610148085","https://openalex.org/W2763421725","https://openalex.org/W2773987374","https://openalex.org/W2799620402","https://openalex.org/W2901476322","https://openalex.org/W2903564615","https://openalex.org/W2908510526","https://openalex.org/W2909063104","https://openalex.org/W2956961449","https://openalex.org/W2962851944","https://openalex.org/W2964113829","https://openalex.org/W2973074478","https://openalex.org/W2980282514","https://openalex.org/W2985931096","https://openalex.org/W2997704445","https://openalex.org/W3009321976","https://openalex.org/W3010145447","https://openalex.org/W3043969542","https://openalex.org/W3093934881","https://openalex.org/W3110901318","https://openalex.org/W3165171933","https://openalex.org/W3168867926","https://openalex.org/W3195604886","https://openalex.org/W3202868527","https://openalex.org/W4214653918","https://openalex.org/W4225000967","https://openalex.org/W4226078808","https://openalex.org/W4253343410","https://openalex.org/W4281262113","https://openalex.org/W4291186635","https://openalex.org/W4292720494","https://openalex.org/W4293861706","https://openalex.org/W4317566456","https://openalex.org/W4320728073","https://openalex.org/W4366769286","https://openalex.org/W4383955629","https://openalex.org/W4385245566","https://openalex.org/W4388765324","https://openalex.org/W4388855368","https://openalex.org/W4390221451","https://openalex.org/W4391088785","https://openalex.org/W4391272197","https://openalex.org/W4391881459","https://openalex.org/W4393382554","https://openalex.org/W4400273590","https://openalex.org/W4402458626","https://openalex.org/W4403963670","https://openalex.org/W4404347348","https://openalex.org/W4404511389","https://openalex.org/W4405577466","https://openalex.org/W4405613539","https://openalex.org/W4409183464","https://openalex.org/W4411160688","https://openalex.org/W4412826771","https://openalex.org/W6947997424"],"related_works":[],"abstract_inverted_index":{"Rapid,":[0],"sustainable":[1],"redesign":[2],"of":[3,10,139,195],"large":[4,115],"functional":[5,197],"molecules":[6,143],"demands":[7],"efficient":[8,186],"exploration":[9],"vast":[11],"chemical":[12],"spaces.":[13],"Chemical":[14],"language":[15],"models":[16],"(CLMs),":[17],"especially":[18],"transformers,":[19],"learn":[20],"long-range":[21],"structure-property":[22],"relations":[23],"and":[24,82,93,113,125,141,187],"enable":[25],"swift,":[26],"batched":[27],"candidate":[28],"generation":[29,45],"after":[30],"training.":[31],"However,":[32],"inverse":[33,62,193],"molecular":[34,161,178],"design":[35,63,167,194],"is":[36],"often":[37,46],"ill-posed\u2500many":[38],"structures":[39,162],"can":[40],"meet":[41],"a":[42,59,76,121,126,136,147],"target\u2500and":[43],"conditioned":[44],"decodes":[47],"to":[48,80,97,152,164,173,191],"invalid":[49,99],"or":[50],"off-spec":[51],"molecules.":[52,198],"To":[53,101],"address":[54],"this":[55],"challenge,":[56],"we":[57,88,105],"propose":[58],"novel":[60,196],"CLM-based":[61],"framework":[64,134,183],"that":[65,132,158],"optimizes":[66],"latent":[67],"representations":[68],"toward":[69],"desired":[70],"target":[71,106,148],"properties.":[72],"Our":[73],"approach":[74],"introduces":[75],"round-trip":[77],"fidelity":[78],"metric":[79],"quantify":[81],"diagnose":[83],"decoder-induced":[84],"latent-space":[85],"drift,":[86],"which":[87],"mitigate":[89],"via":[90],"postdecoding":[91],"re-ranking":[92],"predictor-guided":[94],"minimal-edit":[95],"repair":[96],"correct":[98],"structures.":[100],"demonstrate":[102],"the":[103,107,159,170,181,192],"framework,":[104],"surfactant":[108],"critical":[109],"micelle":[110],"concentration":[111],"(CMC)":[112],"compare":[114],"pretrained":[116],"CLMs,":[117],"our":[118,133],"lightweight":[119],"CLM,":[120],"fragment-based":[122],"genetic":[123],"algorithm,":[124],"prompt-conditioned":[127],"ChatGPT":[128],"baseline.":[129],"We":[130],"observe":[131],"yields":[135],"high":[137],"proportion":[138],"valid":[140],"diverse":[142],"(\u223c90%)":[144],"while":[145],"maintaining":[146],"property":[149],"error":[150],"close":[151],"1%.":[153],"Moreover,":[154],"interpretability":[155],"analysis":[156],"confirms":[157],"designed":[160],"adhere":[163],"established":[165],"physical":[166,175],"rules,":[168],"highlighting":[169],"framework's":[171],"ability":[172],"extract":[174],"insights":[176],"for":[177],"design.":[179],"Therefore,":[180],"current":[182],"provides":[184],"an":[185],"broadly":[188],"applicable":[189],"solution":[190]},"counts_by_year":[],"updated_date":"2026-05-03T08:25:01.440150","created_date":"2026-03-05T00:00:00"}
