{"id":"https://openalex.org/W4415693877","doi":"https://doi.org/10.1186/s13321-025-01079-0","title":"Measuring Chemical\u00a0LLM robustness to molecular representations: a SMILES variation-based framework","display_name":"Measuring Chemical\u00a0LLM robustness to molecular representations: a SMILES variation-based framework","publication_year":2025,"publication_date":"2025-10-30","ids":{"openalex":"https://openalex.org/W4415693877","doi":"https://doi.org/10.1186/s13321-025-01079-0","pmid":"https://pubmed.ncbi.nlm.nih.gov/41168806"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-025-01079-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-025-01079-0","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-025-01079-0","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-025-01079-0","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009708250","display_name":"Veronika Ganeeva","orcid":null},"institutions":[{"id":"https://openalex.org/I1315649409","display_name":"Institute on Taxation and Economic Policy","ror":"https://ror.org/00w9dm031","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1315649409"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Veronika Ganeeva","raw_affiliation_strings":["AIRI, 6 Presnenskaya, Moscow, Russia, 123112"],"affiliations":[{"raw_affiliation_string":"AIRI, 6 Presnenskaya, Moscow, Russia, 123112","institution_ids":["https://openalex.org/I1315649409"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074301645","display_name":"Kuzma Khrabrov","orcid":null},"institutions":[{"id":"https://openalex.org/I1315649409","display_name":"Institute on Taxation and Economic Policy","ror":"https://ror.org/00w9dm031","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1315649409"]},{"id":"https://openalex.org/I4210127092","display_name":"The State Scientific Research Institute of Civil Aviation","ror":"https://ror.org/034ahxs15","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210127092"]}],"countries":["RU","US"],"is_corresponding":false,"raw_author_name":"Kuzma Khrabrov","raw_affiliation_strings":["AIRI, 6 Presnenskaya, Moscow, Russia, 123112. khrabrov@airi.net","AIRI, 6 Presnenskaya, Moscow, Russia, 123112"],"affiliations":[{"raw_affiliation_string":"AIRI, 6 Presnenskaya, Moscow, Russia, 123112. khrabrov@airi.net","institution_ids":["https://openalex.org/I4210127092"]},{"raw_affiliation_string":"AIRI, 6 Presnenskaya, Moscow, Russia, 123112","institution_ids":["https://openalex.org/I1315649409"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019867253","display_name":"Artur Kadurin","orcid":"https://orcid.org/0000-0003-1482-9365"},"institutions":[{"id":"https://openalex.org/I1315649409","display_name":"Institute on Taxation and Economic Policy","ror":"https://ror.org/00w9dm031","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1315649409"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Artur Kadurin","raw_affiliation_strings":["AIRI, 6 Presnenskaya, Moscow, Russia, 123112","ISP RAS Research Center for Trusted Artificial Intelligence, Moscow, Russia"],"affiliations":[{"raw_affiliation_string":"AIRI, 6 Presnenskaya, Moscow, Russia, 123112","institution_ids":["https://openalex.org/I1315649409"]},{"raw_affiliation_string":"ISP RAS Research Center for Trusted Artificial Intelligence, Moscow, Russia","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012311258","display_name":"Elena Tutubalina","orcid":"https://orcid.org/0000-0001-7936-0284"},"institutions":[{"id":"https://openalex.org/I103031861","display_name":"Plekhanov Russian University of Economics","ror":"https://ror.org/04pbtsc74","country_code":"RU","type":"education","lineage":["https://openalex.org/I103031861"]},{"id":"https://openalex.org/I118501908","display_name":"National Research University Higher School of Economics","ror":"https://ror.org/055f7t516","country_code":"RU","type":"education","lineage":["https://openalex.org/I118501908"]},{"id":"https://openalex.org/I1315649409","display_name":"Institute on Taxation and Economic Policy","ror":"https://ror.org/00w9dm031","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1315649409"]}],"countries":["RU","US"],"is_corresponding":false,"raw_author_name":"Elena Tutubalina","raw_affiliation_strings":["AIRI, 6 Presnenskaya, Moscow, Russia, 123112","ISP RAS Research Center for Trusted Artificial Intelligence, Moscow, Russia","National Research University Higher School of Economics (HSE University), 11 Pokrovksy Bulvar, Moscow, Russia, 109028","Sber AI, Kutuzovsky Prospekt, 32, 121170, Moscow, Russia"],"affiliations":[{"raw_affiliation_string":"AIRI, 6 Presnenskaya, Moscow, Russia, 123112","institution_ids":["https://openalex.org/I1315649409"]},{"raw_affiliation_string":"ISP RAS Research Center for Trusted Artificial Intelligence, Moscow, Russia","institution_ids":[]},{"raw_affiliation_string":"National Research University Higher School of Economics (HSE University), 11 Pokrovksy Bulvar, Moscow, Russia, 109028","institution_ids":["https://openalex.org/I118501908"]},{"raw_affiliation_string":"Sber AI, Kutuzovsky Prospekt, 32, 121170, Moscow, Russia","institution_ids":["https://openalex.org/I103031861"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5009708250"],"corresponding_institution_ids":["https://openalex.org/I1315649409"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":2.84,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.92668165,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"17","issue":"1","first_page":"164","last_page":"164"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.4779999852180481,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.4779999852180481,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.29330000281333923,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.08829999715089798,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7832000255584717},{"id":"https://openalex.org/keywords/cheminformatics","display_name":"Cheminformatics","score":0.7495999932289124},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5758000016212463},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.47929999232292175},{"id":"https://openalex.org/keywords/chemical-similarity","display_name":"Chemical similarity","score":0.43799999356269836},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.43459999561309814},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4246000051498413}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8070999979972839},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7832000255584717},{"id":"https://openalex.org/C68762167","wikidata":"https://www.wikidata.org/wiki/Q910164","display_name":"Cheminformatics","level":2,"score":0.7495999932289124},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5758000016212463},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.47929999232292175},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46720001101493835},{"id":"https://openalex.org/C24259465","wikidata":"https://www.wikidata.org/wiki/Q2272153","display_name":"Chemical similarity","level":3,"score":0.43799999356269836},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.43459999561309814},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4246000051498413},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4002000093460083},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3716999888420105},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36579999327659607},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.3492000102996826},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32850000262260437},{"id":"https://openalex.org/C2779587451","wikidata":"https://www.wikidata.org/wiki/Q898710","display_name":"Smiles rearrangement","level":2,"score":0.31209999322891235},{"id":"https://openalex.org/C81147070","wikidata":"https://www.wikidata.org/wiki/Q1172449","display_name":"Encapsulation (networking)","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C124223222","wikidata":"https://www.wikidata.org/wiki/Q2281940","display_name":"Chemical process","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.25589999556541443},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.25209999084472656},{"id":"https://openalex.org/C203394866","wikidata":"https://www.wikidata.org/wiki/Q2881060","display_name":"Chemical database","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s13321-025-01079-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-025-01079-0","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-025-01079-0","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:41168806","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41168806","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:3fdd7bf4bbec40b4a0a360c268e43a13","is_oa":true,"landing_page_url":"https://doaj.org/article/3fdd7bf4bbec40b4a0a360c268e43a13","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 17, Iss 1, Pp 1-15 (2025)","raw_type":"article"},{"id":"pmh:oai:europepmc.org:11383826","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12574305","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/s13321-025-01079-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-025-01079-0","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-025-01079-0","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3014329568","display_name":null,"funder_award_id":"23-11-00358","funder_id":"https://openalex.org/F4320324099","funder_display_name":"Russian Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320324099","display_name":"Russian Science Foundation","ror":"https://ror.org/03y2gwe85"},{"id":"https://openalex.org/F4320324261","display_name":"National Research University Higher School of Economics","ror":"https://ror.org/055f7t516"},{"id":"https://openalex.org/F4320324632","display_name":"Ministero dello Sviluppo Economico","ror":"https://ror.org/011z3ff80"},{"id":"https://openalex.org/F4320334801","display_name":"Russian Academy of Sciences","ror":"https://ror.org/05qrfxd25"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415693877.pdf","grobid_xml":"https://content.openalex.org/works/W4415693877.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W1757990252","https://openalex.org/W1975147762","https://openalex.org/W2080635178","https://openalex.org/W2096541451","https://openalex.org/W2101105183","https://openalex.org/W2114704115","https://openalex.org/W2177317049","https://openalex.org/W2594183968","https://openalex.org/W2963341956","https://openalex.org/W2963469388","https://openalex.org/W2969656782","https://openalex.org/W2998702515","https://openalex.org/W3034999214","https://openalex.org/W3082081167","https://openalex.org/W3094640617","https://openalex.org/W3211951295","https://openalex.org/W4220902634","https://openalex.org/W4226159083","https://openalex.org/W4306179830","https://openalex.org/W4307468223","https://openalex.org/W4312632987","https://openalex.org/W4385572894","https://openalex.org/W4386566826","https://openalex.org/W4396721893","https://openalex.org/W4404780835","https://openalex.org/W6977367121"],"related_works":[],"abstract_inverted_index":{"The":[0,79,92],"recent":[1],"integration":[2],"of":[3,29,52,56,73,76,102,110,146],"natural":[4],"language":[5,16],"processing":[6],"into":[7],"chemistry":[8,58,74],"has":[9],"advanced":[10],"drug":[11],"discovery.":[12],"Molecule":[13],"representations":[14,47,101],"in":[15,45],"models":[17,30,130],"(LMs)":[18],"are":[19,119],"crucial":[20],"to":[21,31,123],"enhance":[22],"chemical":[23,35,90],"understanding.":[24],"We":[25,60,127,149],"explored":[26],"the":[27,33,42,54,71,96,99,103,116,129,135,152,161],"ability":[28],"match":[32],"same":[34,43],"structures":[36],"despite":[37],"their":[38],"different":[39,46,77,124],"representations.":[40,126],"Recognizing":[41],"substance":[44],"is":[48,81],"an":[49],"important":[50],"component":[51],"emulating":[53],"understanding":[55],"how":[57],"works.":[59],"propose":[61],"Augmented":[62],"Molecular":[63],"Retrieval":[64],"(AMORE),":[65],"a":[66,88],"flexible":[67],"zero-shot":[68],"framework":[69,80],"for":[70],"assessment":[72],"LMs":[75],"natures.":[78],"based":[82],"on":[83,131,138],"SMILES":[84,106,125,156],"augmentations":[85],"that":[86,109,115,151],"maintain":[87],"foundational":[89],"structure.":[91],"proposed":[93,162],"method":[94],"facilitates":[95],"similarity":[97],"between":[98],"embedding":[100],"molecule,":[104],"its":[105],"variation,":[107],"and":[108,141,143],"another":[111],"molecule.":[112],"Experiments":[113],"indicate":[114],"tested":[117],"ChemLLMs":[118],"still":[120],"not":[121],"robust":[122],"evaluated":[128],"various":[132],"tasks,":[133],"including":[134],"molecular":[136],"captioning":[137],"ChEBI-20":[139],"benchmark":[140],"classification":[142],"regression":[144],"tasks":[145],"MoleculeNet":[147],"benchmark.":[148],"show":[150],"results'":[153],"change":[154],"after":[155],"strings":[157],"variations":[158],"align":[159],"with":[160],"AMORE":[163],"framework.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-30T00:00:00"}
