{"id":"https://openalex.org/W4417429827","doi":"https://doi.org/10.1021/acs.jcim.5c01834","title":"Toward Complete Molecular Structure Prediction from Infrared Spectroscopy Using Deep Learning","display_name":"Toward Complete Molecular Structure Prediction from Infrared Spectroscopy Using Deep Learning","publication_year":2025,"publication_date":"2025-12-17","ids":{"openalex":"https://openalex.org/W4417429827","doi":"https://doi.org/10.1021/acs.jcim.5c01834","pmid":"https://pubmed.ncbi.nlm.nih.gov/41403343"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.5c01834","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c01834","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039034006","display_name":"Colin Zhang","orcid":"https://orcid.org/0009-0009-2411-9640"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I4210150971","display_name":"Institute of Molecular Biology and Biophysics","ror":"https://ror.org/051f11n17","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210110862","https://openalex.org/I4210150971"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["RU","US"],"is_corresponding":false,"raw_author_name":"Colin Zhang","raw_affiliation_strings":["Berkeley Center for Structural Biology, Molecular Biophysics and Integrated Bioimaging","Lawrence Berkeley National Laboratory","Massachusetts Institute of Technology","Massachusetts Institute of Technology, Cambridge, Massachusetts 02139, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Berkeley Center for Structural Biology, Molecular Biophysics and Integrated Bioimaging","institution_ids":["https://openalex.org/I4210150971"]},{"raw_affiliation_string":"Lawrence Berkeley National Laboratory","institution_ids":["https://openalex.org/I148283060"]},{"raw_affiliation_string":"Massachusetts Institute of Technology","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, Massachusetts 02139, United States","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066777165","display_name":"Yang Ha","orcid":"https://orcid.org/0000-0001-5684-8420"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I4210150971","display_name":"Institute of Molecular Biology and Biophysics","ror":"https://ror.org/051f11n17","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210110862","https://openalex.org/I4210150971"]}],"countries":["RU","US"],"is_corresponding":true,"raw_author_name":"Yang Ha","raw_affiliation_strings":["Berkeley Center for Structural Biology, Molecular Biophysics and Integrated Bioimaging","Lawrence Berkeley National Laboratory"],"raw_orcid":"https://orcid.org/0000-0001-5684-8420","affiliations":[{"raw_affiliation_string":"Berkeley Center for Structural Biology, Molecular Biophysics and Integrated Bioimaging","institution_ids":["https://openalex.org/I4210150971"]},{"raw_affiliation_string":"Lawrence Berkeley National Laboratory","institution_ids":["https://openalex.org/I148283060"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5066777165"],"corresponding_institution_ids":["https://openalex.org/I148283060","https://openalex.org/I4210150971"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30894583,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"66","issue":"1","first_page":"100","last_page":"109"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.6937000155448914,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.6937000155448914,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11324","display_name":"Spectroscopy Techniques in Biomedical and Chemical Research","score":0.07450000196695328,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.055799998342990875,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5774999856948853},{"id":"https://openalex.org/keywords/spectroscopy","display_name":"Spectroscopy","score":0.5667999982833862},{"id":"https://openalex.org/keywords/infrared-spectroscopy","display_name":"Infrared spectroscopy","score":0.5559999942779541},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.546999990940094},{"id":"https://openalex.org/keywords/spectral-line","display_name":"Spectral line","score":0.529699981212616},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.45339998602867126},{"id":"https://openalex.org/keywords/density-functional-theory","display_name":"Density functional theory","score":0.4465000033378601},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.429500013589859},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.390500009059906}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6292999982833862},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5936999917030334},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5774999856948853},{"id":"https://openalex.org/C32891209","wikidata":"https://www.wikidata.org/wiki/Q483666","display_name":"Spectroscopy","level":2,"score":0.5667999982833862},{"id":"https://openalex.org/C153642686","wikidata":"https://www.wikidata.org/wiki/Q70906","display_name":"Infrared spectroscopy","level":2,"score":0.5559999942779541},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.546999990940094},{"id":"https://openalex.org/C4839761","wikidata":"https://www.wikidata.org/wiki/Q212111","display_name":"Spectral line","level":2,"score":0.529699981212616},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.45339998602867126},{"id":"https://openalex.org/C152365726","wikidata":"https://www.wikidata.org/wiki/Q1048589","display_name":"Density functional theory","level":2,"score":0.4465000033378601},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.429500013589859},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.390500009059906},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.38929998874664307},{"id":"https://openalex.org/C39539562","wikidata":"https://www.wikidata.org/wiki/Q7858699","display_name":"Two-dimensional infrared spectroscopy","level":3,"score":0.38440001010894775},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.3707999885082245},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36820000410079956},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.36640000343322754},{"id":"https://openalex.org/C32909587","wikidata":"https://www.wikidata.org/wiki/Q11369","display_name":"Molecule","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3562999963760376},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.34880000352859497},{"id":"https://openalex.org/C158355884","wikidata":"https://www.wikidata.org/wiki/Q11388","display_name":"Infrared","level":2,"score":0.33169999718666077},{"id":"https://openalex.org/C135252773","wikidata":"https://www.wikidata.org/wiki/Q1567213","display_name":"Inverse problem","level":2,"score":0.32739999890327454},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32120001316070557},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.3018999993801117},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.2985999882221222},{"id":"https://openalex.org/C198352243","wikidata":"https://www.wikidata.org/wiki/Q37105","display_name":"Line (geometry)","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C147597530","wikidata":"https://www.wikidata.org/wiki/Q369472","display_name":"Computational chemistry","level":1,"score":0.2786000072956085},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.25459998846054077}],"mesh":[{"descriptor_ui":"D000077318","descriptor_name":"Density Functional Theory","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000077318","descriptor_name":"Density Functional Theory","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D013055","descriptor_name":"Spectrophotometry, Infrared","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013055","descriptor_name":"Spectrophotometry, Infrared","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015394","descriptor_name":"Molecular Structure","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015394","descriptor_name":"Molecular Structure","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1021/acs.jcim.5c01834","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c01834","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:41403343","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41403343","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1988091937","https://openalex.org/W2011301426","https://openalex.org/W2028046413","https://openalex.org/W2045794503","https://openalex.org/W2079199763","https://openalex.org/W2080447388","https://openalex.org/W2146266654","https://openalex.org/W2151697120","https://openalex.org/W2161782568","https://openalex.org/W2172216479","https://openalex.org/W2292646695","https://openalex.org/W2558999090","https://openalex.org/W2972308609","https://openalex.org/W3009321976","https://openalex.org/W3034310241","https://openalex.org/W3036061391","https://openalex.org/W3038823641","https://openalex.org/W3082300343","https://openalex.org/W3164194348","https://openalex.org/W3176750058","https://openalex.org/W3217248762","https://openalex.org/W4248107770","https://openalex.org/W4281688302","https://openalex.org/W4321603581","https://openalex.org/W4385476447","https://openalex.org/W4404435697"],"related_works":[],"abstract_inverted_index":{"Infrared":[0],"(IR)":[1],"spectroscopy":[2,243],"is":[3,25,38],"a":[4,105,121,154,180],"broadly":[5],"used":[6],"tool":[7],"to":[8,57,63,114,152,200,216,237],"solve":[9],"the":[10,17,28,65,76,86,95,116,137,141,175,206,231,239],"molecular":[11,33,118],"structures":[12,34],"of":[13,19,31,67,78,88,97,145,158,185,190,210,233,241],"unknown":[14],"compounds.":[15],"Though":[16],"theory":[18,169],"generating":[20],"IR":[21,132,162,242],"spectra":[22,37,48,68,163],"from":[23,35,130,221],"molecules":[24],"well":[26],"established,":[27],"inverse":[29],"problem":[30],"solving":[32],"given":[36],"still":[39],"challenging.":[40],"Because":[41],"complex":[42],"organic":[43],"compounds":[44],"often":[45],"produce":[46],"ambiguous":[47],"with":[49,198],"overlapping":[50],"peaks,":[51],"even":[52],"human":[53],"researchers":[54],"may":[55],"struggle":[56],"interpret":[58],"them":[59],"accurately.":[60],"Prior":[61],"attempts":[62],"automate":[64],"interpretation":[66],"using":[69,234],"machine":[70],"learning":[71,108,236],"have":[72],"been":[73],"limited":[74],"by":[75,111],"availability":[77],"high-quality":[79],"data":[80,156],"and":[81,225,248],"primarily":[82],"focused":[83],"on":[84,179],"predicting":[85,94],"presence":[87],"individual":[89,195],"functional":[90,168,196],"groups":[91,197],"rather":[92],"than":[93],"structure":[96,119,177],"entire":[98],"molecules.":[99],"In":[100],"this":[101],"work,":[102],"we":[103,139],"propose":[104],"dual-loss":[106],"deep":[107,235],"architecture,":[109],"inspired":[110],"image-captioning":[112],"models,":[113,138],"predict":[115],"complete":[117,176],"as":[120],"Simplified":[122],"Molecular":[123],"Input":[124],"Line":[125],"Entry":[126],"System":[127],"(SMILES)":[128],"string":[129],"only":[131],"spectral":[133,226],"absorbances.":[134],"To":[135],"train":[136],"leveraged":[140],"high-performance":[142],"computing":[143],"resources":[144],"Lawrence":[146],"Berkeley":[147],"National":[148],"Laboratory's":[149],"Lawrencium":[150],"cluster":[151],"generate":[153],"rich":[155],"set":[157,189],"over":[159],"17,000":[160],"unique":[161],"through":[164],"quantum":[165],"mechanical":[166],"density":[167],"calculations.":[170],"Our":[171],"best-performing":[172],"model":[173,212],"predicts":[174],"correctly":[178],"single":[181],"attempt":[182],"for":[183],"16.26%":[184],"an":[186],"unseen":[187],"test":[188],"1710":[191],"spectra,":[192],"while":[193],"regenerating":[194],"up":[199],"88%":[201],"accuracy.":[202],"We":[203],"show":[204],"that":[205],"dual":[207],"loss":[208],"function":[209],"our":[211],"architecture":[213],"enables":[214],"it":[215],"learn":[217],"important":[218],"chemical":[219],"properties":[220],"both":[222],"SMILES":[223],"strings":[224],"data.":[227],"This":[228],"research":[229],"suggests":[230],"potential":[232],"enhance":[238],"capabilities":[240],"in":[244],"analytical":[245],"chemistry,":[246],"medicine,":[247],"other":[249],"fields.":[250]},"counts_by_year":[],"updated_date":"2026-01-14T00:41:55.809242","created_date":"2025-12-17T00:00:00"}
