{"id":"https://openalex.org/W4414941879","doi":"https://doi.org/10.1021/acs.jcim.5c00464","title":"Upgrading Reliability in Molecular Property Prediction by Robust Quantification of Uncertainty from Machine Learning Models","display_name":"Upgrading Reliability in Molecular Property Prediction by Robust Quantification of Uncertainty from Machine Learning Models","publication_year":2025,"publication_date":"2025-10-08","ids":{"openalex":"https://openalex.org/W4414941879","doi":"https://doi.org/10.1021/acs.jcim.5c00464","pmid":"https://pubmed.ncbi.nlm.nih.gov/41060699"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.5c00464","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c00464","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092566343","display_name":"Alexander K\u00f6tter","orcid":"https://orcid.org/0000-0003-2977-6729"},"institutions":[{"id":"https://openalex.org/I4210110242","display_name":"Digital Science (United States)","ror":"https://ror.org/020h4b682","country_code":"US","type":"company","lineage":["https://openalex.org/I4210110242","https://openalex.org/I4210112888","https://openalex.org/I4210118830"]},{"id":"https://openalex.org/I4210138838","display_name":"Sanofi (Germany)","ror":"https://ror.org/03ytdtb31","country_code":"DE","type":"company","lineage":["https://openalex.org/I3018075036","https://openalex.org/I4210138838"]}],"countries":["DE","US"],"is_corresponding":true,"raw_author_name":"Alex K\u00f6tter","raw_affiliation_strings":["Digital R&D Large Molecule Research","Sanofi-Aventis Deutschland GmbH"],"raw_orcid":"https://orcid.org/0000-0003-2977-6729","affiliations":[{"raw_affiliation_string":"Digital R&D Large Molecule Research","institution_ids":["https://openalex.org/I4210110242"]},{"raw_affiliation_string":"Sanofi-Aventis Deutschland GmbH","institution_ids":["https://openalex.org/I4210138838"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063848138","display_name":"Kanishka Singh","orcid":"https://orcid.org/0000-0003-2700-9307"},"institutions":[{"id":"https://openalex.org/I4210138838","display_name":"Sanofi (Germany)","ror":"https://ror.org/03ytdtb31","country_code":"DE","type":"company","lineage":["https://openalex.org/I3018075036","https://openalex.org/I4210138838"]},{"id":"https://openalex.org/I4210146958","display_name":"Synthetic Biologics (United States)","ror":"https://ror.org/04trz4q34","country_code":"US","type":"company","lineage":["https://openalex.org/I4210146958"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Kanishka Singh","raw_affiliation_strings":["Sanofi-Aventis Deutschland GmbH","Synthetic Molecular Design, Integrated Drug Discovery"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sanofi-Aventis Deutschland GmbH","institution_ids":["https://openalex.org/I4210138838"]},{"raw_affiliation_string":"Synthetic Molecular Design, Integrated Drug Discovery","institution_ids":["https://openalex.org/I4210146958"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007864658","display_name":"Hans Matter","orcid":null},"institutions":[{"id":"https://openalex.org/I4210138838","display_name":"Sanofi (Germany)","ror":"https://ror.org/03ytdtb31","country_code":"DE","type":"company","lineage":["https://openalex.org/I3018075036","https://openalex.org/I4210138838"]},{"id":"https://openalex.org/I4210146958","display_name":"Synthetic Biologics (United States)","ror":"https://ror.org/04trz4q34","country_code":"US","type":"company","lineage":["https://openalex.org/I4210146958"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Hans Matter","raw_affiliation_strings":["Sanofi-Aventis Deutschland GmbH","Synthetic Molecular Design, Integrated Drug Discovery"],"raw_orcid":"https://orcid.org/0000-0002-0249-6025","affiliations":[{"raw_affiliation_string":"Sanofi-Aventis Deutschland GmbH","institution_ids":["https://openalex.org/I4210138838"]},{"raw_affiliation_string":"Synthetic Molecular Design, Integrated Drug Discovery","institution_ids":["https://openalex.org/I4210146958"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061787871","display_name":"Gerhard He\u00dfler","orcid":"https://orcid.org/0000-0001-5602-0965"},"institutions":[{"id":"https://openalex.org/I4210138838","display_name":"Sanofi (Germany)","ror":"https://ror.org/03ytdtb31","country_code":"DE","type":"company","lineage":["https://openalex.org/I3018075036","https://openalex.org/I4210138838"]},{"id":"https://openalex.org/I4210146958","display_name":"Synthetic Biologics (United States)","ror":"https://ror.org/04trz4q34","country_code":"US","type":"company","lineage":["https://openalex.org/I4210146958"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Gerhard Hessler","raw_affiliation_strings":["Sanofi-Aventis Deutschland GmbH","Synthetic Molecular Design, Integrated Drug Discovery"],"raw_orcid":"https://orcid.org/0000-0001-5602-0965","affiliations":[{"raw_affiliation_string":"Sanofi-Aventis Deutschland GmbH","institution_ids":["https://openalex.org/I4210138838"]},{"raw_affiliation_string":"Synthetic Molecular Design, Integrated Drug Discovery","institution_ids":["https://openalex.org/I4210146958"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042738116","display_name":"Christoph Grebner","orcid":"https://orcid.org/0000-0001-5301-1078"},"institutions":[{"id":"https://openalex.org/I4210138838","display_name":"Sanofi (Germany)","ror":"https://ror.org/03ytdtb31","country_code":"DE","type":"company","lineage":["https://openalex.org/I3018075036","https://openalex.org/I4210138838"]},{"id":"https://openalex.org/I4210146958","display_name":"Synthetic Biologics (United States)","ror":"https://ror.org/04trz4q34","country_code":"US","type":"company","lineage":["https://openalex.org/I4210146958"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Christoph Grebner","raw_affiliation_strings":["Sanofi-Aventis Deutschland GmbH","Synthetic Molecular Design, Integrated Drug Discovery"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sanofi-Aventis Deutschland GmbH","institution_ids":["https://openalex.org/I4210138838"]},{"raw_affiliation_string":"Synthetic Molecular Design, Integrated Drug Discovery","institution_ids":["https://openalex.org/I4210146958"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5092566343"],"corresponding_institution_ids":["https://openalex.org/I4210110242","https://openalex.org/I4210138838"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28237094,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"65","issue":"20","first_page":"10819","last_page":"10831"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.9509999752044678,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.7218000292778015},{"id":"https://openalex.org/keywords/uncertainty-quantification","display_name":"Uncertainty quantification","score":0.6883000135421753},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.6485999822616577},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5687999725341797},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.532800018787384},{"id":"https://openalex.org/keywords/mean-squared-prediction-error","display_name":"Mean squared prediction error","score":0.39329999685287476},{"id":"https://openalex.org/keywords/predictive-modelling","display_name":"Predictive modelling","score":0.38449999690055847},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.3792000114917755}],"concepts":[{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7491000294685364},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.7218000292778015},{"id":"https://openalex.org/C32230216","wikidata":"https://www.wikidata.org/wiki/Q7882499","display_name":"Uncertainty quantification","level":2,"score":0.6883000135421753},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6797999739646912},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6563000082969666},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.6485999822616577},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5687999725341797},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.532800018787384},{"id":"https://openalex.org/C167085575","wikidata":"https://www.wikidata.org/wiki/Q6803654","display_name":"Mean squared prediction error","level":2,"score":0.39329999685287476},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.38449999690055847},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3799000084400177},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3792000114917755},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.3718000054359436},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.3434000015258789},{"id":"https://openalex.org/C164923092","wikidata":"https://www.wikidata.org/wiki/Q3705921","display_name":"Molecular descriptor","level":3,"score":0.3199999928474426},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.314300000667572},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C137209882","wikidata":"https://www.wikidata.org/wiki/Q1403517","display_name":"Measurement uncertainty","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C123614077","wikidata":"https://www.wikidata.org/wiki/Q1364905","display_name":"Propagation of uncertainty","level":2,"score":0.27410000562667847},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26589998602867126},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.25279998779296875},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.25110000371932983},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2508000135421753}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D013329","descriptor_name":"Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013329","descriptor_name":"Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015203","descriptor_name":"Reproducibility of Results","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015203","descriptor_name":"Reproducibility of Results","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D035501","descriptor_name":"Uncertainty","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D035501","descriptor_name":"Uncertainty","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1021/acs.jcim.5c00464","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c00464","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:41060699","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41060699","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1500036797","https://openalex.org/W1560021816","https://openalex.org/W1567512734","https://openalex.org/W1988037271","https://openalex.org/W1988195734","https://openalex.org/W1991238353","https://openalex.org/W1994249991","https://openalex.org/W2007344527","https://openalex.org/W2034541070","https://openalex.org/W2039609876","https://openalex.org/W2042278642","https://openalex.org/W2043990744","https://openalex.org/W2048611698","https://openalex.org/W2060531713","https://openalex.org/W2079699273","https://openalex.org/W2085890279","https://openalex.org/W2165618135","https://openalex.org/W2329862012","https://openalex.org/W2407586185","https://openalex.org/W2900415334","https://openalex.org/W2911789160","https://openalex.org/W2947216528","https://openalex.org/W2997287432","https://openalex.org/W3012035124","https://openalex.org/W3012320417","https://openalex.org/W3012585755","https://openalex.org/W3014339631","https://openalex.org/W3014596384","https://openalex.org/W3044724994","https://openalex.org/W3107587236","https://openalex.org/W3112474878","https://openalex.org/W3113447514","https://openalex.org/W3134774296","https://openalex.org/W3159598019","https://openalex.org/W3170966572","https://openalex.org/W3185456481","https://openalex.org/W3199470976","https://openalex.org/W3202227570","https://openalex.org/W4214910967","https://openalex.org/W4284708848","https://openalex.org/W4286508612","https://openalex.org/W4292828881","https://openalex.org/W4296035488","https://openalex.org/W4310603653","https://openalex.org/W4311436943","https://openalex.org/W4319074048","https://openalex.org/W4381308335","https://openalex.org/W4385379503","https://openalex.org/W4386065840","https://openalex.org/W4386438166","https://openalex.org/W4387617009","https://openalex.org/W4388488709","https://openalex.org/W4392093325","https://openalex.org/W4402926983","https://openalex.org/W4404355135","https://openalex.org/W6931589241"],"related_works":[],"abstract_inverted_index":{"Reliable":[0],"methods":[1,92,103],"to":[2,43,105],"quantify":[3],"the":[4,15,47,71,77,84,121],"predictive":[5,35,85],"uncertainty":[6,86,89],"of":[7,17,37,46,65,67,87,112],"machine":[8],"learning":[9,29,174],"(ML)":[10],"models":[11,39],"can":[12],"significantly":[13,134],"increase":[14],"impact":[16],"molecular":[18,94],"property":[19,32,55],"prediction":[20],"and":[21,30,61,83,131,149,166],"are":[22,51],"routinely":[23],"used":[24],"in":[25,70,110,162,170],"applications":[26],"like":[27],"active":[28,173],"ML-guided":[31],"optimization.":[33],"Poor":[34],"accuracy":[36],"ML":[38],"is":[40],"often":[41],"related":[42],"(i)":[44],"regions":[45,111],"chemical":[48],"space,":[49],"which":[50],"characterized":[52],"by":[53,126],"large":[54],"differences":[56],"for":[57,153],"structurally":[58],"similar":[59],"molecules,":[60],"(ii)":[62],"a":[63,145],"lack":[64],"representation":[66],"test":[68,132],"molecules":[69],"training":[72,130],"data.":[73],"Here,":[74],"we":[75,143],"analyze":[76],"relationship":[78],"between":[79],"these":[80],"error":[81],"sources":[82],"popular":[88],"quantification":[90],"(UQ)":[91],"on":[93,140],"activity":[95],"data":[96,127],"sets.":[97],"We":[98,117],"find":[99],"that":[100,120,155],"several":[101,163],"UQ":[102,137,154],"struggle":[104],"identify":[106],"poorly":[107],"predicted":[108],"compounds":[109],"steep":[113],"structure-activity":[114],"relationships":[115],"(SAR).":[116],"also":[118],"demonstrate":[119,167],"evaluation":[122,164],"scenario,":[123],"as":[124],"defined":[125],"splitting":[128],"into":[129],"sets,":[133],"impacts":[135],"observed":[136],"performance.":[138],"Based":[139],"our":[141],"findings":[142],"introduce":[144],"simple":[146],"but":[147],"strong":[148],"very":[150],"robust":[151],"method":[152],"offers":[156],"significant":[157],"improvements":[158],"over":[159],"previous":[160],"approaches":[161],"scenarios":[165],"its":[168],"usefulness":[169],"an":[171],"exploratory":[172],"setting.":[175]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
