{"id":"https://openalex.org/W7141538142","doi":"https://doi.org/10.1109/access.2026.3678512","title":"Thinking Like a CHEMIST: Combined Heterogeneous Embedding Model Integrating Structure and Tokens","display_name":"Thinking Like a CHEMIST: Combined Heterogeneous Embedding Model Integrating Structure and Tokens","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7141538142","doi":"https://doi.org/10.1109/access.2026.3678512"},"language":null,"primary_location":{"id":"doi:10.1109/access.2026.3678512","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3678512","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2026.3678512","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130725383","display_name":"Nikolai A. Rekut","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098440","display_name":"Ministry of Health","ror":"https://ror.org/00y6q9n79","country_code":"ES","type":"government","lineage":["https://openalex.org/I4210098440"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Nikolai A. Rekut","raw_affiliation_strings":["N.N. Blokhin National Medical Research Center of Oncology (N.N. Blokhin NMRCO), Ministry of Health of the Russian Federation, Moscow, Russia"],"raw_orcid":"https://orcid.org/0009-0006-6193-1863","affiliations":[{"raw_affiliation_string":"N.N. Blokhin National Medical Research Center of Oncology (N.N. Blokhin NMRCO), Ministry of Health of the Russian Federation, Moscow, Russia","institution_ids":["https://openalex.org/I4210098440"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130720937","display_name":"Alexey A. Orlov","orcid":null},"institutions":[{"id":"https://openalex.org/I4392021246","display_name":"AIRI - Artificial Intelligence Research Institute","ror":"https://ror.org/014a87f14","country_code":"RU","type":"nonprofit","lineage":["https://openalex.org/I4392021246"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Alexey A. Orlov","raw_affiliation_strings":["Basic Research of Artificial Intelligence Laboratory (BRAIn Lab), Moscow Independent Research Institute of Artificial Intelligence, Moscow, Russia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Basic Research of Artificial Intelligence Laboratory (BRAIn Lab), Moscow Independent Research Institute of Artificial Intelligence, Moscow, Russia","institution_ids":["https://openalex.org/I4392021246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047617995","display_name":"Klea Ziu","orcid":"https://orcid.org/0000-0001-5809-6816"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Klea Ziu","raw_affiliation_strings":["Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates"],"raw_orcid":"https://orcid.org/0000-0001-5809-6816","affiliations":[{"raw_affiliation_string":"Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130759359","display_name":"Elizaveta Starykh","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Elizaveta Starykh","raw_affiliation_strings":["Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates"],"raw_orcid":"https://orcid.org/0000-0002-0738-9835","affiliations":[{"raw_affiliation_string":"Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130736396","display_name":"Martin Tak\u00e1\u010d","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Martin Tak\u00e1\u010d","raw_affiliation_strings":["Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088060268","display_name":"Aleksandr Beznosikov","orcid":"https://orcid.org/0000-0002-3217-3614"},"institutions":[{"id":"https://openalex.org/I4392021246","display_name":"AIRI - Artificial Intelligence Research Institute","ror":"https://ror.org/014a87f14","country_code":"RU","type":"nonprofit","lineage":["https://openalex.org/I4392021246"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Aleksandr N. Beznosikov","raw_affiliation_strings":["Basic Research of Artificial Intelligence Laboratory (BRAIn Lab), Moscow Independent Research Institute of Artificial Intelligence, Moscow, Russia"],"raw_orcid":"https://orcid.org/0000-0002-3217-3614","affiliations":[{"raw_affiliation_string":"Basic Research of Artificial Intelligence Laboratory (BRAIn Lab), Moscow Independent Research Institute of Artificial Intelligence, Moscow, Russia","institution_ids":["https://openalex.org/I4392021246"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.49643597,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":null,"first_page":"53145","last_page":"53163"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11445","display_name":"Origins and Evolution of Life","score":0.08299999684095383,"subfield":{"id":"https://openalex.org/subfields/3103","display_name":"Astronomy and Astrophysics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11445","display_name":"Origins and Evolution of Life","score":0.08299999684095383,"subfield":{"id":"https://openalex.org/subfields/3103","display_name":"Astronomy and Astrophysics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10778","display_name":"Philosophy and History of Science","score":0.03449999913573265,"subfield":{"id":"https://openalex.org/subfields/1207","display_name":"History and Philosophy of Science"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10072","display_name":"Science Education and Pedagogy","score":0.027400000020861626,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6011999845504761},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.3481000065803528},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.313400000333786},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.31029999256134033}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8353999853134155},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6011999845504761},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.498199999332428},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40450000762939453},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3481000065803528},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.313400000333786},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.31029999256134033},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2667999863624573},{"id":"https://openalex.org/C3020028006","wikidata":"https://www.wikidata.org/wiki/Q9158","display_name":"Electronic mail","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2540999948978424}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/access.2026.3678512","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3678512","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/access.2026.3678512","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3678512","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4904107451438904,"display_name":"Zero hunger","id":"https://metadata.un.org/sdg/2"}],"awards":[{"id":"https://openalex.org/G7640686651","display_name":null,"funder_award_id":"139-15-2025-008","funder_id":"https://openalex.org/F4320329807","funder_display_name":"Ministry of Health of the Russian Federation"}],"funders":[{"id":"https://openalex.org/F4320329807","display_name":"Ministry of Health of the Russian Federation","ror":"https://ror.org/01p8ehb87"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Representing":[0],"molecular":[1],"structures":[2],"effectively":[3],"in":[4,39,144],"chemistry":[5],"remains":[6],"a":[7,58,67,107],"challenging":[8],"task.":[9],"Language":[10],"models":[11,14,52],"and":[12,49,75,85,96,104,131],"graph-based":[13,116],"are":[15],"extensively":[16],"utilized":[17],"within":[18],"this":[19,63,94,112],"domain,":[20],"consistently":[21],"achieving":[22],"state-of-the-":[23],"art":[24],"results":[25],"across":[26],"an":[27],"array":[28],"of":[29,35],"tasks.":[30],"However,":[31],"the":[32,40],"prevailing":[33],"practice":[34],"representing":[36],"chemical":[37],"compounds":[38],"SMILES":[41],"format":[42],"\u2013":[43,53],"used":[44],"by":[45],"most":[46],"data":[47,60,98],"sets":[48],"many":[50],"language":[51,102,113],"presents":[54],"notable":[55,139],"limitations":[56],"as":[57,99,133,148],"training":[59],"format.":[61],"In":[62],"study,":[64],"we":[65,120],"present":[66],"novel":[68],"approach":[69],"that":[70,110],"decomposes":[71],"molecules":[72],"into":[73],"substructures":[74],"computes":[76],"descriptor-based":[77],"representations":[78],"for":[79,89,101],"these":[80],"fragments,":[81],"providing":[82],"more":[83],"detailed":[84],"chemically":[86],"relevant":[87],"input":[88,100],"model":[90,103,114],"training.":[91],"We":[92],"use":[93,121],"substructure":[95],"descriptor":[97],"also":[105],"propose":[106],"bimodal":[108],"architecture":[109],"integrates":[111],"with":[115],"models.":[117],"As":[118],"LM":[119],"RoBERTa,":[122],"Graph":[123,127],"Isomorphism":[124],"Networks":[125,129],"(GIN),":[126],"Convolutional":[128],"(GCN)":[130],"Graphormer":[132],"graph":[134],"ones.":[135],"Our":[136],"framework":[137],"shows":[138],"improvements":[140],"over":[141],"traditional":[142],"methods":[143],"various":[145],"tasks":[146],"such":[147],"Quantitative":[149],"Structure-Activity":[150],"Relationship":[151],"(QSAR)":[152],"prediction.":[153]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-28T00:00:00"}
