{"id":"https://openalex.org/W7161683854","doi":"https://doi.org/10.48550/arxiv.2605.16480","title":"MoleCode unlocks structural intelligence in large language models","display_name":"MoleCode unlocks structural intelligence in large language models","publication_year":2026,"publication_date":"2026-05-15","ids":{"openalex":"https://openalex.org/W7161683854","doi":"https://doi.org/10.48550/arxiv.2605.16480"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.16480","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.16480","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.16480","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136482063","display_name":"Zhiyuan Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Zhiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136476721","display_name":"Chen Liu","orcid":"https://orcid.org/0000-0002-5693-8634"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124759765","display_name":"Boxuan Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Boxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136491615","display_name":"Kaiqing Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Kaiqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129477547","display_name":"Jixiang Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Jixiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136486687","display_name":"Yimi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yimi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005977834","display_name":"Liuzhenghao Lv","orcid":"https://orcid.org/0000-0001-5604-1678"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Liuzhenghao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136493490","display_name":"Hao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025026473","display_name":"Shanzhuo Zhang","orcid":"https://orcid.org/0000-0002-0098-8536"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shanzhuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136489061","display_name":"Li Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136459932","display_name":"Fanyang Mo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mo, Fanyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.932699978351593,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.932699978351593,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.010900000110268593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.010700000450015068,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.5741000175476074},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5284000039100647},{"id":"https://openalex.org/keywords/forcing","display_name":"Forcing (mathematics)","score":0.5278000235557556},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4163999855518341},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.3718000054359436},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.36970001459121704},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.3644999861717224},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.36039999127388},{"id":"https://openalex.org/keywords/grammar","display_name":"Grammar","score":0.35749998688697815}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6283000111579895},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.5741000175476074},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5284000039100647},{"id":"https://openalex.org/C197115733","wikidata":"https://www.wikidata.org/wiki/Q1003136","display_name":"Forcing (mathematics)","level":2,"score":0.5278000235557556},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.45969998836517334},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44589999318122864},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4278999865055084},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4163999855518341},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.3718000054359436},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.36970001459121704},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3644999861717224},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.36039999127388},{"id":"https://openalex.org/C26022165","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Grammar","level":2,"score":0.35749998688697815},{"id":"https://openalex.org/C184720557","wikidata":"https://www.wikidata.org/wiki/Q7825049","display_name":"Topology (electrical circuits)","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.34220001101493835},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.33899998664855957},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.3377000093460083},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.33000001311302185},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3165000081062317},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.28290000557899475},{"id":"https://openalex.org/C57098296","wikidata":"https://www.wikidata.org/wiki/Q12021746","display_name":"Interrogative","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2648000121116638},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2639999985694885},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.25440001487731934},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2524999976158142},{"id":"https://openalex.org/C2992562121","wikidata":"https://www.wikidata.org/wiki/Q3817808","display_name":"Scientific reasoning","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.16480","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.16480","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.16480","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.16480","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6909785270690918,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Molecules":[0],"are":[1,7,65],"graphs,":[2],"but":[3],"large":[4],"language":[5,59,87],"models~(LLMs)":[6],"usually":[8],"asked":[9],"to":[10,40,92,144,171,183,226],"reason":[11],"about":[12],"them":[13],"through":[14],"linear":[15],"strings.":[16],"The":[17,175],"most":[18,116],"popular":[19],"molecular":[20,42,58,63,78,103,160],"representation,":[21],"SMILES,":[22],"compresses":[23],"atoms,":[24],"bonds,":[25],"branches":[26],"and":[27,73,83,107,129,157,189,196,207,218],"rings":[28],"into":[29],"a":[30],"compact":[31],"sequence":[32],"in":[33,60,199],"which":[34,61,200],"topology":[35,79],"is":[36,121,137,203,236],"implicit,":[37],"forcing":[38],"LLMs":[39,115,219],"reconstruct":[41],"structure":[43,95,223,239],"before":[44],"performing":[45],"the":[46,86,172,213,232,238,245],"requested":[47],"chemical":[48,201],"operation.":[49],"Here":[50],"we":[51],"introduce":[52],"MoleCode,":[53],"an":[54,90],"LLM-native,":[55],"training-free,":[56],"graph-explicit":[57],"all":[62],"components":[64],"represented":[66],"as":[67,224],"typed":[68],"entities":[69],"with":[70,148],"persistent":[71],"identifiers":[72],"explicit":[74,155],"relations.":[75],"MoleCode":[76],"makes":[77],"directly":[80],"readable,":[81],"editable":[82],"auditable":[84],"within":[85],"context,":[88],"allowing":[89],"LLM":[91],"operate":[93],"on":[94],"rather":[96],"than":[97],"recover":[98],"it":[99],"from":[100,229],"syntax.":[101],"Across":[102],"reasoning,":[104],"editing,":[105],"generation":[106],"analysis":[108],"tasks,":[109],"this":[110,162],"representational":[111],"shift":[112],"improves":[113],"frontier":[114],"strongly":[117],"when":[118],"structural":[119,146,169],"access":[120],"limiting:":[122],"unfamiliar":[123],"molecules,":[124],"topology-sensitive":[125],"operations,":[126],"larger":[127],"structures":[128],"repetitive":[130],"polymers.":[131],"It":[132],"also":[133],"changes":[134],"how":[135],"inference":[136],"allocated,":[138],"replacing":[139],"long":[140],"reasoning":[141,153,235],"traces":[142],"devoted":[143],"implicit":[145],"reconstruction":[147],"shorter,":[149],"more":[150],"chemically":[151],"directed":[152],"over":[154],"atoms":[156],"bonds.":[158],"In":[159],"optimization,":[161],"enables":[163],"localized,":[164],"property-aligned":[165],"edits":[166],"that":[167,212],"preserve":[168],"similarity":[170],"starting":[173],"compounds.":[174],"same":[176],"Subgraph--Node--Edge":[177],"grammar":[178],"extends":[179],"beyond":[180],"small":[181],"molecules":[182],"polymers,":[184],"Markush":[185],"structures,":[186],"mechanism-style":[187],"transformations":[188],"interleaved":[190],"scientific":[191,216],"documents,":[192],"including":[193],"research":[194],"articles":[195],"patent":[197],"disclosures":[198],"information":[202],"distributed":[204],"across":[205],"text":[206],"images.":[208],"These":[209],"results":[210],"suggest":[211],"interface":[214],"between":[215],"objects":[217],"should":[220,241],"not":[221],"treat":[222],"something":[225],"be":[227,242],"decoded":[228],"text.":[230],"When":[231],"object":[233],"of":[234,244],"relational,":[237],"itself":[240],"part":[243],"language.":[246]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
