{"id":"https://openalex.org/W7117108912","doi":"https://doi.org/10.48550/arxiv.2512.18531","title":"Pushing the limits of one-dimensional NMR spectroscopy for automated structure elucidation using artificial intelligence","display_name":"Pushing the limits of one-dimensional NMR spectroscopy for automated structure elucidation using artificial intelligence","publication_year":2025,"publication_date":"2025-12-20","ids":{"openalex":"https://openalex.org/W7117108912","doi":"https://doi.org/10.48550/arxiv.2512.18531"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2512.18531","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.18531","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2512.18531","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101784501","display_name":"Frank Hu","orcid":"https://orcid.org/0009-0001-4783-0947"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hu, Frank","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108364732","display_name":"J. A. Tubb","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tubb, Jonathan M.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021057857","display_name":"Dimitris S. Argyropoulos","orcid":"https://orcid.org/0000-0003-1187-7919"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Argyropoulos, Dimitris","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044025113","display_name":"S. S. Golotvin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Golotvin, Sergey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085705035","display_name":"Mikhail Elyashberg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elyashberg, Mikhail","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071403558","display_name":"Grant M. Rotskoff","orcid":"https://orcid.org/0000-0002-7772-5179"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rotskoff, Grant M.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025223034","display_name":"Matthew W. Kanan","orcid":"https://orcid.org/0000-0002-5932-6289"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kanan, Matthew W.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5069747836","display_name":"Thomas E. Markland","orcid":"https://orcid.org/0000-0002-2747-0518"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Markland, Thomas E.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101784501"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.8575000166893005,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.8575000166893005,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.04580000042915344,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12748","display_name":"Molecular spectroscopy and chirality","score":0.04230000078678131,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nuclear-magnetic-resonance-spectroscopy","display_name":"Nuclear magnetic resonance spectroscopy","score":0.6029999852180481},{"id":"https://openalex.org/keywords/characterization","display_name":"Characterization (materials science)","score":0.552299976348877},{"id":"https://openalex.org/keywords/molecule","display_name":"Molecule","score":0.5460000038146973},{"id":"https://openalex.org/keywords/chemical-space","display_name":"Chemical space","score":0.532800018787384},{"id":"https://openalex.org/keywords/spectroscopy","display_name":"Spectroscopy","score":0.4918999969959259},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.46320000290870667},{"id":"https://openalex.org/keywords/organic-molecules","display_name":"Organic molecules","score":0.4406000077724457}],"concepts":[{"id":"https://openalex.org/C66974803","wikidata":"https://www.wikidata.org/wiki/Q10359898","display_name":"Nuclear magnetic resonance spectroscopy","level":2,"score":0.6029999852180481},{"id":"https://openalex.org/C2780841128","wikidata":"https://www.wikidata.org/wiki/Q5073781","display_name":"Characterization (materials science)","level":2,"score":0.552299976348877},{"id":"https://openalex.org/C32909587","wikidata":"https://www.wikidata.org/wiki/Q11369","display_name":"Molecule","level":2,"score":0.5460000038146973},{"id":"https://openalex.org/C99726746","wikidata":"https://www.wikidata.org/wiki/Q906396","display_name":"Chemical space","level":3,"score":0.532800018787384},{"id":"https://openalex.org/C32891209","wikidata":"https://www.wikidata.org/wiki/Q483666","display_name":"Spectroscopy","level":2,"score":0.4918999969959259},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.46320000290870667},{"id":"https://openalex.org/C2984497647","wikidata":"https://www.wikidata.org/wiki/Q174211","display_name":"Organic molecules","level":3,"score":0.4406000077724457},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.39070001244544983},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.358599990606308},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3580000102519989},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.34040001034736633},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33379998803138733},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.3221000134944916},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3149999976158142},{"id":"https://openalex.org/C171250308","wikidata":"https://www.wikidata.org/wiki/Q11468","display_name":"Nanotechnology","level":1,"score":0.30320000648498535},{"id":"https://openalex.org/C67020732","wikidata":"https://www.wikidata.org/wiki/Q7068131","display_name":"Nuclear magnetic resonance crystallography","level":4,"score":0.289000004529953},{"id":"https://openalex.org/C163111631","wikidata":"https://www.wikidata.org/wiki/Q1952454","display_name":"Carbon-13 NMR","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C19549132","wikidata":"https://www.wikidata.org/wiki/Q11141463","display_name":"Two-dimensional nuclear magnetic resonance spectroscopy","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C111429119","wikidata":"https://www.wikidata.org/wiki/Q899127","display_name":"Chemical shift","level":2,"score":0.27489998936653137}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2512.18531","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.18531","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2512.18531","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.18531","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6154363751411438,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"One-dimensional":[0],"NMR":[1,64,160],"spectroscopy":[2],"is":[3,80],"one":[4],"of":[5,14,30,44,51,54,125,167],"the":[6,12,28,46,95,113,126,143,150,156,164,168],"most":[7],"widely":[8],"used":[9],"techniques":[10],"for":[11,86],"characterization":[13],"organic":[15,102],"compounds":[16],"and":[17,49,112,158],"natural":[18,133],"products.":[19],"For":[20],"molecules":[21],"with":[22,88,146],"up":[23,89],"to":[24,36,82,90,175],"36":[25],"non-hydrogen":[26,92],"atoms,":[27],"number":[29],"possible":[31,81],"structures":[32],"has":[33],"been":[34],"estimated":[35],"range":[37],"from":[38,132],"$10^{20}":[39],"-":[40],"10^{60}$.":[41],"The":[42],"task":[43,85],"determining":[45],"structure":[47,69],"(formula":[48],"connectivity)":[50],"a":[52,116,122],"molecule":[53,145],"this":[55,84],"size":[56],"using":[57,115,154],"only":[58,155],"its":[59],"one-dimensional":[60],"$^1$H":[61,157],"and/or":[62],"$^{13}$C":[63,159],"spectrum,":[65],"i.e.":[66],"de":[67],"novo":[68],"generation,":[70],"thus":[71,120,162],"appears":[72],"completely":[73],"intractable.":[74],"Here":[75],"we":[76,136],"show":[77,137],"how":[78],"it":[79],"achieve":[83],"systems":[87],"40":[91],"atoms":[93],"across":[94],"full":[96],"elemental":[97],"coverage":[98],"typically":[99],"encountered":[100],"in":[101],"chemistry":[103],"(C,":[104],"N,":[105],"O,":[106],"H,":[107],"P,":[108],"S,":[109],"Si,":[110],"B,":[111],"halogens)":[114],"deep":[117],"learning":[118],"framework,":[119],"covering":[121],"vast":[123],"portion":[124],"drug-like":[127],"chemical":[128,169],"space.":[129],"Leveraging":[130],"insights":[131],"language":[134],"processing,":[135],"that":[138],"our":[139],"transformer-based":[140],"architecture":[141],"predicts":[142],"correct":[144],"55.2%":[147],"accuracy":[148],"within":[149],"first":[151],"15":[152],"predictions":[153],"spectra,":[161],"overcoming":[163],"combinatorial":[165],"growth":[166],"space":[170],"while":[171],"also":[172],"being":[173],"extensible":[174],"experimental":[176],"data":[177],"via":[178],"fine-tuning.":[179]},"counts_by_year":[],"updated_date":"2025-12-24T23:14:05.333182","created_date":"2025-12-24T00:00:00"}
