{"id":"https://openalex.org/W4412697141","doi":"https://doi.org/10.1021/acs.jcim.6c00218","title":"Relating Model Performance to Embedding Distributions in Molecular Machine Learning","display_name":"Relating Model Performance to Embedding Distributions in Molecular Machine Learning","publication_year":2025,"publication_date":"2025-07-28","ids":{"openalex":"https://openalex.org/W4412697141","doi":"https://doi.org/10.1021/acs.jcim.6c00218","pmid":"https://pubmed.ncbi.nlm.nih.gov/42018690"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.6c00218","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.6c00218","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.6c00218?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"preprint","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.6c00218?ref=article_openPDF","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023314919","display_name":"Matthias Welsch","orcid":"https://orcid.org/0009-0007-9443-895X"},"institutions":[{"id":"https://openalex.org/I129774422","display_name":"University of Vienna","ror":"https://ror.org/03prydq77","country_code":"AT","type":"education","lineage":["https://openalex.org/I129774422"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Matthias Welsch","raw_affiliation_strings":["Christian Doppler Laboratory for Molecular Informatics in the Biosciences, Department for Pharmaceutical Sciences","Department of Pharmaceutical Sciences, Faculty of Life Sciences, University of Vienna, Josef-Holaubek-Platz 2, 1090 Vienna, Austria","University of Vienna","Vienna Doctoral School of Pharmaceutical, Nutritional and Sport Sciences (PhaNuSpo)"],"raw_orcid":"https://orcid.org/0009-0007-9443-895X","affiliations":[{"raw_affiliation_string":"Christian Doppler Laboratory for Molecular Informatics in the Biosciences, Department for Pharmaceutical Sciences","institution_ids":[]},{"raw_affiliation_string":"Department of Pharmaceutical Sciences, Faculty of Life Sciences, University of Vienna, Josef-Holaubek-Platz 2, 1090 Vienna, Austria","institution_ids":[]},{"raw_affiliation_string":"University of Vienna","institution_ids":["https://openalex.org/I129774422"]},{"raw_affiliation_string":"Vienna Doctoral School of Pharmaceutical, Nutritional and Sport Sciences (PhaNuSpo)","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ellena Jiang","orcid":"https://orcid.org/0009-0008-9883-5335"},"institutions":[{"id":"https://openalex.org/I129774422","display_name":"University of Vienna","ror":"https://ror.org/03prydq77","country_code":"AT","type":"education","lineage":["https://openalex.org/I129774422"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Ellena Jiang","raw_affiliation_strings":["Department of Pharmaceutical Sciences, Faculty of Life Sciences, University of Vienna, Josef-Holaubek-Platz 2, 1090 Vienna, Austria","University of Vienna"],"raw_orcid":"https://orcid.org/0009-0008-9883-5335","affiliations":[{"raw_affiliation_string":"Department of Pharmaceutical Sciences, Faculty of Life Sciences, University of Vienna, Josef-Holaubek-Platz 2, 1090 Vienna, Austria","institution_ids":[]},{"raw_affiliation_string":"University of Vienna","institution_ids":["https://openalex.org/I129774422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080470524","display_name":"Ioannis Papantonis","orcid":"https://orcid.org/0000-0003-4282-5820"},"institutions":[{"id":"https://openalex.org/I129774422","display_name":"University of Vienna","ror":"https://ror.org/03prydq77","country_code":"AT","type":"education","lineage":["https://openalex.org/I129774422"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Ioannis Papantonis","raw_affiliation_strings":["Department of Pharmaceutical Sciences, Faculty of Life Sciences, University of Vienna, Josef-Holaubek-Platz 2, 1090 Vienna, Austria","University of Vienna"],"raw_orcid":"https://orcid.org/0000-0003-4282-5820","affiliations":[{"raw_affiliation_string":"Department of Pharmaceutical Sciences, Faculty of Life Sciences, University of Vienna, Josef-Holaubek-Platz 2, 1090 Vienna, Austria","institution_ids":[]},{"raw_affiliation_string":"University of Vienna","institution_ids":["https://openalex.org/I129774422"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065353728","display_name":"Johannes Kirchmair","orcid":"https://orcid.org/0000-0003-2667-5877"},"institutions":[{"id":"https://openalex.org/I129774422","display_name":"University of Vienna","ror":"https://ror.org/03prydq77","country_code":"AT","type":"education","lineage":["https://openalex.org/I129774422"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Johannes Kirchmair","raw_affiliation_strings":["Christian Doppler Laboratory for Molecular Informatics in the Biosciences, Department for Pharmaceutical Sciences","Department of Pharmaceutical Sciences, Faculty of Life Sciences, University of Vienna, Josef-Holaubek-Platz 2, 1090 Vienna, Austria","University of Vienna"],"raw_orcid":"https://orcid.org/0000-0003-2667-5877","affiliations":[{"raw_affiliation_string":"Christian Doppler Laboratory for Molecular Informatics in the Biosciences, Department for Pharmaceutical Sciences","institution_ids":[]},{"raw_affiliation_string":"Department of Pharmaceutical Sciences, Faculty of Life Sciences, University of Vienna, Josef-Holaubek-Platz 2, 1090 Vienna, Austria","institution_ids":[]},{"raw_affiliation_string":"University of Vienna","institution_ids":["https://openalex.org/I129774422"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5065353728"],"corresponding_institution_ids":["https://openalex.org/I129774422"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17639452,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"66","issue":"9","first_page":"5125","last_page":"5134"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9532999992370605,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6728319525718689},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4843810498714447},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41517600417137146},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34918373823165894}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6728319525718689},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4843810498714447},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41517600417137146},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34918373823165894}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016014","descriptor_name":"Linear Models","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016014","descriptor_name":"Linear Models","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1021/acs.jcim.6c00218","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.6c00218","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.6c00218?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"doi:10.26434/chemrxiv-2025-qmqdr","is_oa":true,"landing_page_url":"https://doi.org/10.26434/chemrxiv-2025-qmqdr","pdf_url":"https://chemrxiv.org/engage/api-gateway/chemrxiv/assets/orp/resource/item/6881f132728bf9025ef53fdf/original/relating-model-performance-to-embedding-distributions-in-molecular-machine-learning.pdf","source":{"id":"https://openalex.org/S4393918830","display_name":"ChemRxiv","issn_l":"2573-2293","issn":["2573-2293"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"posted-content"},{"id":"pmid:42018690","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/42018690","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:13169369","is_oa":true,"landing_page_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC13169369/","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Chem Inf Model","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1021/acs.jcim.6c00218","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.6c00218","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.6c00218?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F1267731599","display_name":"Bundesministerium f\u00fcr Arbeit und Wirtschaft","ror":null},{"id":"https://openalex.org/F4320307790","display_name":"BASF","ror":"https://ror.org/01q8f6705"},{"id":"https://openalex.org/F4320311813","display_name":"\u00d6sterreichische Nationalstiftung f\u00fcr Forschung, Technologie und Entwicklung","ror":"https://ror.org/04hb33h70"},{"id":"https://openalex.org/F4320321691","display_name":"Universit\u00e4t Wien","ror":"https://ror.org/03prydq77"},{"id":"https://openalex.org/F4320323591","display_name":"Christian Doppler Forschungsgesellschaft","ror":"https://ror.org/00mv8h305"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412697141.pdf","grobid_xml":"https://content.openalex.org/works/W4412697141.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Choosing":[0],"effective":[1],"molecular":[2,9,155],"representations":[3,156,198],"remains":[4],"a":[5,58,114,137,144],"central":[6],"challenge":[7],"in":[8,103,148],"machine":[10],"learning":[11],"and":[12,157],"is":[13,22,76,180,194,199,209],"often":[14],"addressed":[15],"through":[16],"costly":[17],"trial-and-error.":[18],"While":[19],"model":[20,45],"selection":[21],"typically":[23],"guided":[24],"solely":[25],"by":[26,39,183,216],"predictive":[27,66],"performance,":[28],"analyzing":[29],"relationships":[30],"between":[31,82],"trained":[32],"models":[33,63,107,171],"can":[34],"reveal":[35],"additional":[36],"structure":[37],"missed":[38],"performance":[40,80,95,112],"metrics.":[41],"In":[42,68,205],"terms":[43],"of":[44,220],"similarity":[46],"metrics,":[47],"representational":[48,74],"alignment":[49,55,75,91,179,193,208],"techniques,":[50],"such":[51],"as":[52],"centered":[53],"kernel":[54],"(CKA),":[56],"provide":[57],"principled":[59],"framework":[60],"for":[61],"comparing":[62],"beyond":[64],"their":[65],"performance.":[67,204],"this":[69],"work,":[70],"we":[71,87,116,129,162],"show":[72,89],"that":[73,90,142,164,167,178,191],"fundamentally":[77],"linked":[78],"to":[79,173,202],"differences":[81],"models.":[83],"For":[84],"linear":[85],"regression,":[86],"theoretically":[88],"upper-bounds":[92],"the":[93,131,149,218,221],"achievable":[94],"gaps.":[96],"This":[97],"result":[98],"predicts":[99,143],"an":[100],"exclusion":[101],"zone":[102],"which":[104],"highly":[105,169],"aligned":[106,170],"do":[108],"not":[109],"exhibit":[110,174],"large":[111],"differences,":[113],"phenomenon":[115],"empirically":[117],"validate":[118],"across":[119],"661":[120],"classification":[121],"data":[122,139,145,160,165,184],"sets.":[123],"To":[124],"make":[125],"these":[126],"insights":[127],"actionable,":[128],"introduce":[130],"mean":[132],"minimum":[133],"class":[134],"distance":[135],"(MMCD),":[136],"straightforward":[138],"set-level":[140],"statistic":[141],"set's":[146],"position":[147],"alignment-performance":[150],"difference":[151],"space.":[152],"Across":[153],"23":[154],"ten":[158],"representative":[159],"sets,":[161],"find":[163],"sets":[166],"produce":[168],"tend":[172],"low":[175],"MMCD,":[176],"suggesting":[177],"strongly":[181],"shaped":[182],"set-specific":[185],"structure.":[186],"Overall,":[187],"our":[188],"results":[189],"indicate":[190],"when":[192,207],"low,":[195],"exploring":[196],"alternative":[197],"more":[200,213],"likely":[201],"improve":[203],"contrast,":[206],"high,":[210],"gains":[211],"are":[212],"effectively":[214],"achieved":[215],"increasing":[217],"size":[219],"training":[222],"data.":[223]},"counts_by_year":[],"updated_date":"2026-06-19T17:40:00.097472","created_date":"2025-10-10T00:00:00"}
