{"id":"https://openalex.org/W2900415334","doi":"https://doi.org/10.1021/acs.jcim.8b00597","title":"Molecular Similarity-Based Domain Applicability Metric Efficiently Identifies Out-of-Domain Compounds","display_name":"Molecular Similarity-Based Domain Applicability Metric Efficiently Identifies Out-of-Domain Compounds","publication_year":2018,"publication_date":"2018-11-07","ids":{"openalex":"https://openalex.org/W2900415334","doi":"https://doi.org/10.1021/acs.jcim.8b00597","mag":"2900415334","pmid":"https://pubmed.ncbi.nlm.nih.gov/30404432"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.8b00597","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.8b00597","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100738736","display_name":"Ruifeng Liu","orcid":"https://orcid.org/0000-0001-7582-9217"},"institutions":[{"id":"https://openalex.org/I2800986571","display_name":"Telemedicine & Advanced Technology Research Center","ror":"https://ror.org/014pvr265","country_code":"US","type":"other","lineage":["https://openalex.org/I106965489","https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I2800986571","https://openalex.org/I2802581893"]},{"id":"https://openalex.org/I2802581893","display_name":"United States Army Medical Research and Development Command","ror":"https://ror.org/03cd02q50","country_code":"US","type":"facility","lineage":["https://openalex.org/I106965489","https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I2802581893"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ruifeng Liu","raw_affiliation_strings":["Department of Defense Biotechnology High Performance Computing Software Applications Institute, Telemedicine and Advanced Technology Research Center, U.S. Army Medical Research and Materiel Command, Fort Detrick, Maryland 21702, United States"],"affiliations":[{"raw_affiliation_string":"Department of Defense Biotechnology High Performance Computing Software Applications Institute, Telemedicine and Advanced Technology Research Center, U.S. Army Medical Research and Materiel Command, Fort Detrick, Maryland 21702, United States","institution_ids":["https://openalex.org/I2802581893","https://openalex.org/I2800986571"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030271505","display_name":"Anders Wallqvist","orcid":"https://orcid.org/0000-0002-9775-7469"},"institutions":[{"id":"https://openalex.org/I2800986571","display_name":"Telemedicine & Advanced Technology Research Center","ror":"https://ror.org/014pvr265","country_code":"US","type":"other","lineage":["https://openalex.org/I106965489","https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I2800986571","https://openalex.org/I2802581893"]},{"id":"https://openalex.org/I2802581893","display_name":"United States Army Medical Research and Development Command","ror":"https://ror.org/03cd02q50","country_code":"US","type":"facility","lineage":["https://openalex.org/I106965489","https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I2802581893"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Anders Wallqvist","raw_affiliation_strings":["Department of Defense Biotechnology High Performance Computing Software Applications Institute, Telemedicine and Advanced Technology Research Center, U.S. Army Medical Research and Materiel Command, Fort Detrick, Maryland 21702, United States"],"affiliations":[{"raw_affiliation_string":"Department of Defense Biotechnology High Performance Computing Software Applications Institute, Telemedicine and Advanced Technology Research Center, U.S. Army Medical Research and Materiel Command, Fort Detrick, Maryland 21702, United States","institution_ids":["https://openalex.org/I2802581893","https://openalex.org/I2800986571"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5030271505","https://openalex.org/A5100738736"],"corresponding_institution_ids":["https://openalex.org/I2800986571","https://openalex.org/I2802581893"],"apc_list":null,"apc_paid":null,"fwci":4.4884,"has_fulltext":false,"cited_by_count":74,"citation_normalized_percentile":{"value":0.95565286,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"59","issue":"1","first_page":"181","last_page":"189"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9366000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.7313022613525391},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.7196024656295776},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.7162333130836487},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4967494606971741},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.4226161241531372},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3584780693054199},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2756820321083069},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24842378497123718},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.1746962070465088},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12316542863845825}],"concepts":[{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.7313022613525391},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.7196024656295776},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.7162333130836487},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4967494606971741},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.4226161241531372},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3584780693054199},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2756820321083069},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24842378497123718},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.1746962070465088},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12316542863845825},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015394","descriptor_name":"Molecular Structure","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015394","descriptor_name":"Molecular Structure","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015394","descriptor_name":"Molecular Structure","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D021281","descriptor_name":"Quantitative Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D021281","descriptor_name":"Quantitative Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D021281","descriptor_name":"Quantitative Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D062126","descriptor_name":"Databases, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D062126","descriptor_name":"Databases, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D062126","descriptor_name":"Databases, Chemical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1021/acs.jcim.8b00597","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.8b00597","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:30404432","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/30404432","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5851990620","display_name":null,"funder_award_id":"CBCall14-CBS-05-2-0007","funder_id":"https://openalex.org/F4320332186","funder_display_name":"Defense Threat Reduction Agency"}],"funders":[{"id":"https://openalex.org/F4320332186","display_name":"Defense Threat Reduction Agency","ror":"https://ror.org/04tz64554"},{"id":"https://openalex.org/F4320338280","display_name":"Medical Research and Materiel Command","ror":"https://ror.org/03cd02q50"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1972234779","https://openalex.org/W1988037271","https://openalex.org/W1990399577","https://openalex.org/W2001459151","https://openalex.org/W2042278642","https://openalex.org/W2043990744","https://openalex.org/W2048611698","https://openalex.org/W2062536539","https://openalex.org/W2076498053","https://openalex.org/W2085890279","https://openalex.org/W2089578131","https://openalex.org/W2090846279","https://openalex.org/W2099071242","https://openalex.org/W2118981984","https://openalex.org/W2248442301","https://openalex.org/W2254686952","https://openalex.org/W2810048461","https://openalex.org/W4246968718"],"related_works":["https://openalex.org/W2375480909","https://openalex.org/W2353314428","https://openalex.org/W2012019886","https://openalex.org/W2166090428","https://openalex.org/W2381021552","https://openalex.org/W2354749003","https://openalex.org/W2377121353","https://openalex.org/W2350529538","https://openalex.org/W2076713575","https://openalex.org/W2327130486"],"abstract_inverted_index":{"Domain":[0],"applicability":[1],"(DA)":[2],"is":[3,21,25],"a":[4,123,142,176,180,184,190],"concept":[5],"introduced":[6],"to":[7,42,86,102,106,116],"gauge":[8],"the":[9,28,53,65,87,92,107,131,139,152,194,200,211,219,230,247,259],"reliability":[10,140],"of":[11,30,35,55,67,133,141,147,166,186,193,199,274],"quantitative":[12],"structure-activity":[13],"relationship":[14],"(QSAR)":[15],"predictions.":[16],"A":[17],"leading":[18],"DA":[19,127,232,261,265],"metric":[20,40,69,93,128,233],"ensemble":[22,34,79,113,236,253,273],"variance,":[23],"which":[24,151,267],"defined":[26],"as":[27,82],"variance":[29,80,114,237,254],"predictions":[31,146,217,240],"by":[32],"an":[33,272],"QSAR":[36,275],"models.":[37],"However,":[38,91],"this":[39,61,68],"fails":[41,115],"identify":[43,117],"large":[44,56,118],"prediction":[45,119,143],"errors":[46,153],"in":[47,137,238],"melting":[48],"point":[49],"(MP)":[50],"data,":[51],"despite":[52],"availability":[54],"training":[57,88,108,135,177,201,212],"data":[58,72,149,164,173,213,226],"sets.":[59],"In":[60,121],"study,":[62],"we":[63,160],"examined":[64],"performance":[66],"on":[70,210,271],"MP":[71,148],"and":[73,179,214],"found":[74],"that,":[75],"for":[76,95,150,218,241],"most":[77],"molecules,":[78,97],"increased":[81],"their":[83],"structural":[84,104],"similarity":[85,105],"molecules":[89,99,136],"decreased.":[90],"decreased":[94],"\"out-of-domain\"":[96],"i.e.,":[98],"with":[100,252,280],"little":[101],"no":[103],"compounds.":[109,243],"This":[110],"explains":[111],"why":[112],"errors.":[120],"contrast,":[122],"new":[124,231,260,264],"molecular":[125,168],"similarity-based":[126],"that":[129,229],"considers":[130],"contributions":[132],"all":[134],"gauging":[138],"successfully":[144],"identified":[145],"were":[154],"large.":[155],"To":[156],"validate":[157],"our":[158],"results,":[159],"used":[161],"four":[162],"additional":[163],"sets":[165,227],"diverse":[167],"properties.":[169],"We":[170,203],"divided":[171],"each":[172],"set":[174,178,182,221],"into":[175],"test":[181,195,220],"at":[183],"ratio":[185],"approximately":[187],"2:1,":[188],"ensuring":[189],"small":[191],"fraction":[192],"compounds":[196],"are":[197],"out":[198],"domain.":[202],"then":[204],"trained":[205],"random":[206],"forest":[207],"(RF)":[208],"models":[209],"made":[215],"RF":[216],"molecules.":[222],"Results":[223],"from":[224],"these":[225],"confirm":[228],"significantly":[234],"outperformed":[235],"identifying":[239],"out-of-domain":[242],"For":[244],"within-domain":[245],"compounds,":[246],"two":[248],"metrics":[249],"performed":[250],"similarly,":[251],"marginally":[255],"but":[256],"consistently":[257],"outperforming":[258],"metric.":[262],"The":[263],"metric,":[266],"does":[268],"not":[269],"rely":[270],"models,":[276],"can":[277],"be":[278],"deployed":[279],"any":[281],"machine-learning":[282],"method,":[283],"including":[284],"deep":[285],"neural":[286],"networks.":[287]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":7}],"updated_date":"2026-04-07T14:57:38.498316","created_date":"2025-10-10T00:00:00"}
