{"id":"https://openalex.org/W2079781933","doi":"https://doi.org/10.1021/ci700193u","title":"Data Mining a Small Molecule Drug Screening Representative Subset from NIH PubChem","display_name":"Data Mining a Small Molecule Drug Screening Representative Subset from NIH PubChem","publication_year":2008,"publication_date":"2008-02-27","ids":{"openalex":"https://openalex.org/W2079781933","doi":"https://doi.org/10.1021/ci700193u","mag":"2079781933","pmid":"https://pubmed.ncbi.nlm.nih.gov/18302356"},"language":"en","primary_location":{"id":"doi:10.1021/ci700193u","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci700193u","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028011485","display_name":"Xiang\u2010Qun Xie","orcid":"https://orcid.org/0000-0002-6881-6175"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]},{"id":"https://openalex.org/I2799424032","display_name":"Discovery Institute","ror":"https://ror.org/05t8s3y29","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I2799424032"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiang-Qun Xie","raw_affiliation_strings":["Department of Pharmaceutical Sciences, School of Pharmacy, Pittsburgh Molecular Library Screening Center, Drug Discovery Institute, Pittsburgh, Pennsylvania 15260, and Departments of Computational Biology and Structure Biology, University of Pittsburgh, Pittsburgh, Pennsylvania 15260"],"affiliations":[{"raw_affiliation_string":"Department of Pharmaceutical Sciences, School of Pharmacy, Pittsburgh Molecular Library Screening Center, Drug Discovery Institute, Pittsburgh, Pennsylvania 15260, and Departments of Computational Biology and Structure Biology, University of Pittsburgh, Pittsburgh, Pennsylvania 15260","institution_ids":["https://openalex.org/I2799424032","https://openalex.org/I170201317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100709936","display_name":"Jianzhong Chen","orcid":"https://orcid.org/0000-0002-8232-7298"},"institutions":[{"id":"https://openalex.org/I2799424032","display_name":"Discovery Institute","ror":"https://ror.org/05t8s3y29","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I2799424032"]},{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jian-Zhong Chen","raw_affiliation_strings":["Department of Pharmaceutical Sciences, School of Pharmacy, Pittsburgh Molecular Library Screening Center, Drug Discovery Institute, Pittsburgh, Pennsylvania 15260, and Departments of Computational Biology and Structure Biology, University of Pittsburgh, Pittsburgh, Pennsylvania 15260"],"affiliations":[{"raw_affiliation_string":"Department of Pharmaceutical Sciences, School of Pharmacy, Pittsburgh Molecular Library Screening Center, Drug Discovery Institute, Pittsburgh, Pennsylvania 15260, and Departments of Computational Biology and Structure Biology, University of Pittsburgh, Pittsburgh, Pennsylvania 15260","institution_ids":["https://openalex.org/I2799424032","https://openalex.org/I170201317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5028011485"],"corresponding_institution_ids":["https://openalex.org/I170201317","https://openalex.org/I2799424032"],"apc_list":null,"apc_paid":null,"fwci":8.8116,"has_fulltext":false,"cited_by_count":70,"citation_normalized_percentile":{"value":0.9808542,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"48","issue":"3","first_page":"465","last_page":"475"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10274","display_name":"Synthesis and biological activity","score":0.9843000173568726,"subfield":{"id":"https://openalex.org/subfields/1605","display_name":"Organic Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12023","display_name":"Cholinesterase and Neurodegenerative Diseases","score":0.9632999897003174,"subfield":{"id":"https://openalex.org/subfields/2736","display_name":"Pharmacology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pubchem","display_name":"PubChem","score":0.9879873991012573},{"id":"https://openalex.org/keywords/cheminformatics","display_name":"Cheminformatics","score":0.8890581130981445},{"id":"https://openalex.org/keywords/chemical-space","display_name":"Chemical space","score":0.7655894160270691},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6117297410964966},{"id":"https://openalex.org/keywords/chemical-database","display_name":"Chemical database","score":0.570031464099884},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5439146161079407},{"id":"https://openalex.org/keywords/virtual-screening","display_name":"Virtual screening","score":0.5150629878044128},{"id":"https://openalex.org/keywords/chembl","display_name":"chEMBL","score":0.50983065366745},{"id":"https://openalex.org/keywords/drug-discovery","display_name":"Drug discovery","score":0.4694475829601288},{"id":"https://openalex.org/keywords/in-silico","display_name":"In silico","score":0.4382934868335724},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4333137273788452},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.36160096526145935},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.30003949999809265},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.2519042491912842},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.1383259892463684}],"concepts":[{"id":"https://openalex.org/C158180186","wikidata":"https://www.wikidata.org/wiki/Q278487","display_name":"PubChem","level":2,"score":0.9879873991012573},{"id":"https://openalex.org/C68762167","wikidata":"https://www.wikidata.org/wiki/Q910164","display_name":"Cheminformatics","level":2,"score":0.8890581130981445},{"id":"https://openalex.org/C99726746","wikidata":"https://www.wikidata.org/wiki/Q906396","display_name":"Chemical space","level":3,"score":0.7655894160270691},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6117297410964966},{"id":"https://openalex.org/C203394866","wikidata":"https://www.wikidata.org/wiki/Q2881060","display_name":"Chemical database","level":2,"score":0.570031464099884},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5439146161079407},{"id":"https://openalex.org/C103697762","wikidata":"https://www.wikidata.org/wiki/Q4112105","display_name":"Virtual screening","level":3,"score":0.5150629878044128},{"id":"https://openalex.org/C63222358","wikidata":"https://www.wikidata.org/wiki/Q6120337","display_name":"chEMBL","level":3,"score":0.50983065366745},{"id":"https://openalex.org/C74187038","wikidata":"https://www.wikidata.org/wiki/Q1418791","display_name":"Drug discovery","level":2,"score":0.4694475829601288},{"id":"https://openalex.org/C2775905019","wikidata":"https://www.wikidata.org/wiki/Q192572","display_name":"In silico","level":3,"score":0.4382934868335724},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4333137273788452},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.36160096526145935},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.30003949999809265},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.2519042491912842},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.1383259892463684},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D004364","descriptor_name":"Pharmaceutical Preparations","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D004364","descriptor_name":"Pharmaceutical Preparations","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D004364","descriptor_name":"Pharmaceutical Preparations","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009316","descriptor_name":"National Institutes of Health (U.S.)","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009316","descriptor_name":"National Institutes of Health (U.S.)","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009316","descriptor_name":"National Institutes of Health (U.S.)","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D014481","descriptor_name":"United States","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D014481","descriptor_name":"United States","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D014481","descriptor_name":"United States","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D039781","descriptor_name":"PubMed","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D039781","descriptor_name":"PubMed","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D039781","descriptor_name":"PubMed","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1021/ci700193u","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci700193u","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:18302356","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/18302356","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1594341127","https://openalex.org/W1605923270","https://openalex.org/W1977331780","https://openalex.org/W1980956598","https://openalex.org/W1990451437","https://openalex.org/W1996633695","https://openalex.org/W2000628228","https://openalex.org/W2001633251","https://openalex.org/W2004192296","https://openalex.org/W2021229217","https://openalex.org/W2040389962","https://openalex.org/W2046403479","https://openalex.org/W2060907307","https://openalex.org/W2062520309","https://openalex.org/W2096729078","https://openalex.org/W2104553035","https://openalex.org/W2117170820","https://openalex.org/W2160114756","https://openalex.org/W4236248134","https://openalex.org/W4248107770"],"related_works":["https://openalex.org/W2770765812","https://openalex.org/W4386823632","https://openalex.org/W2001424887","https://openalex.org/W2676236916","https://openalex.org/W164221359","https://openalex.org/W4207063555","https://openalex.org/W2780675302","https://openalex.org/W4294024451","https://openalex.org/W2151971404","https://openalex.org/W2079781933"],"abstract_inverted_index":{"PubChem":[0,38,92,275],"is":[1,11],"a":[2,12,54,75,84,97,165,202,245],"scientific":[3,35,247],"showcase":[4],"of":[5,45,57,61,63,68,179,187,225],"the":[6,24,34,58,66,69,90,112,157,180,220,227,274],"NIH":[7,25],"Roadmap":[8],"Initiatives.":[9],"It":[10,48],"compound":[13,46,128,159,205,271],"repository":[14],"created":[15],"to":[16,52,73,82,125,244],"facilitate":[17],"information":[18],"exchange":[19],"and":[20,33,86,121,134,144,175,183,212,267],"data":[21,76,268],"sharing":[22],"among":[23],"Roadmap-funded":[26],"Molecular":[27],"Library":[28],"Screening":[29],"Center":[30],"Network":[31],"(MLSCN)":[32],"community.":[36],"However,":[37],"has":[39,164],"more":[40],"than":[41],"10":[42],"million":[43,192],"records":[44],"information.":[47],"will":[49,240],"be":[50,201,242],"challenging":[51],"conduct":[53],"drug":[55,216,257],"screening":[56,211],"whole":[59],"database":[60],"millions":[62],"compounds.":[64],"Thus,":[65],"purpose":[67],"present":[70],"study":[71],"was":[72,104,119,122,141],"develop":[74],"mining":[77,269],"cheminformatics":[78],"approach":[79],"in":[80,154,208,213,250,277],"order":[81],"construct":[83],"representative":[85,101,117,196,260],"structure-diverse":[87,204],"sublibrary":[88],"from":[89],"large":[91,270],"database.":[93],"In":[94,218],"this":[95],"study,":[96],"new":[98,139,162,195],"chemical":[99],"diverse":[100,253],"subset,":[102],"rePubChem,":[103],"selected":[105],"by":[106,127],"whole-molecule":[107],"chemistry-space":[108,230],"matrix":[109],"calculation":[110],"using":[111,226],"cell-based":[113,229],"partition":[114,231],"algorithm.":[115],"The":[116,138,161,194],"subset":[118,140,163,197,222],"generated":[120,199],"then":[123],"subjected":[124],"evaluations":[126],"property":[129],"analyses":[130],"based":[131,149],"on":[132,150],"1D":[133],"2D":[135,151,235],"molecular":[136,152,185],"descriptors.":[137],"also":[142,241],"examined":[143],"assessed":[145],"for":[146,207,255,265],"self-similarity":[147],"analysis":[148],"fingerprints":[153],"comparing":[155],"with":[156,172,233],"source":[158],"library.":[160],"much":[166],"smaller":[167],"library":[168,190,198,276],"size":[169],"(540K":[170],"compounds)":[171],"minimum":[173],"similarity":[174,237],"redundancy":[176],"without":[177],"loss":[178],"structural":[181],"diversity":[182],"basic":[184],"properties":[186],"its":[188],"parent":[189],"(5.3":[191],"compounds).":[193],"could":[200],"valuable":[203],"resource":[206],"silico":[209],"virtual":[210,261],"vitro":[214],"HTS":[215],"screening.":[217],"addition,":[219],"established":[221],"generation":[223],"method":[224],"combined":[228],"metrics":[232],"pairwised":[234],"fingerprint-based":[236],"search":[238],"approaches":[239],"important":[243],"broad":[246],"community":[248],"interested":[249],"acquiring":[251],"structurally":[252],"compounds":[254],"efficient":[256],"screening,":[258],"building":[259],"combinatorial":[262],"chemistry":[263],"libraries":[264],"syntheses,":[266],"databases":[272],"like":[273],"general.":[278]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":9}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
