{"id":"https://openalex.org/W2159489774","doi":"https://doi.org/10.1186/1758-2946-5-15","title":"Mining basic active structures from a large-scale database","display_name":"Mining basic active structures from a large-scale database","publication_year":2013,"publication_date":"2013-03-16","ids":{"openalex":"https://openalex.org/W2159489774","doi":"https://doi.org/10.1186/1758-2946-5-15","mag":"2159489774","pmid":"https://pubmed.ncbi.nlm.nih.gov/23497729"},"language":"en","primary_location":{"id":"doi:10.1186/1758-2946-5-15","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1758-2946-5-15","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/1758-2946-5-15","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/1758-2946-5-15","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112064391","display_name":"Naoto TAKADA","orcid":null},"institutions":[{"id":"https://openalex.org/I206011266","display_name":"Kwansei Gakuin University","ror":"https://ror.org/02qf2tx24","country_code":"JP","type":"education","lineage":["https://openalex.org/I206011266"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Naoto Takada","raw_affiliation_strings":["School of Science & Technology, Kwansei Gakuin University, 2-1 Gakuen, Sanda, Hyogo, 669-1337, Japan. okada@dm-lab.info","School of Science & Technology, Kwansei Gakuin University, 2-1 Gakuen, Sanda, Hyogo, 669-1337, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Science & Technology, Kwansei Gakuin University, 2-1 Gakuen, Sanda, Hyogo, 669-1337, Japan. okada@dm-lab.info","institution_ids":[]},{"raw_affiliation_string":"School of Science & Technology, Kwansei Gakuin University, 2-1 Gakuen, Sanda, Hyogo, 669-1337, Japan","institution_ids":["https://openalex.org/I206011266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049133040","display_name":"Norihito Ohmori","orcid":null},"institutions":[{"id":"https://openalex.org/I206011266","display_name":"Kwansei Gakuin University","ror":"https://ror.org/02qf2tx24","country_code":"JP","type":"education","lineage":["https://openalex.org/I206011266"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Norihito Ohmori","raw_affiliation_strings":["School of Science & Technology, Kwansei Gakuin University, 2-1 Gakuen, Sanda, Hyogo, 669-1337, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Science & Technology, Kwansei Gakuin University, 2-1 Gakuen, Sanda, Hyogo, 669-1337, Japan","institution_ids":["https://openalex.org/I206011266"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064919780","display_name":"Takashi Okada","orcid":"https://orcid.org/0000-0002-2910-3797"},"institutions":[{"id":"https://openalex.org/I206011266","display_name":"Kwansei Gakuin University","ror":"https://ror.org/02qf2tx24","country_code":"JP","type":"education","lineage":["https://openalex.org/I206011266"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takashi Okada","raw_affiliation_strings":["School of Science & Technology, Kwansei Gakuin University, 2-1 Gakuen, Sanda, Hyogo, 669-1337, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Science & Technology, Kwansei Gakuin University, 2-1 Gakuen, Sanda, Hyogo, 669-1337, Japan","institution_ids":["https://openalex.org/I206011266"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5112064391"],"corresponding_institution_ids":["https://openalex.org/I206011266"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":0.7447,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.75590826,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"5","issue":"1","first_page":"15","last_page":"15"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9397000074386597,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9397000074386597,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10911","display_name":"Chemical Synthesis and Analysis","score":0.010900000110268593,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11178","display_name":"Receptor Mechanisms and Signaling","score":0.00860000029206276,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pubchem","display_name":"PubChem","score":0.9749171733856201},{"id":"https://openalex.org/keywords/bass","display_name":"Bass (fish)","score":0.6221557855606079},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6101738214492798},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5680713653564453},{"id":"https://openalex.org/keywords/drug-discovery","display_name":"Drug discovery","score":0.5439091324806213},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.46491605043411255},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.39758729934692383},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.3863186240196228},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.21801817417144775},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.18738117814064026}],"concepts":[{"id":"https://openalex.org/C158180186","wikidata":"https://www.wikidata.org/wiki/Q278487","display_name":"PubChem","level":2,"score":0.9749171733856201},{"id":"https://openalex.org/C2777182073","wikidata":"https://www.wikidata.org/wiki/Q1224135","display_name":"Bass (fish)","level":2,"score":0.6221557855606079},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6101738214492798},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5680713653564453},{"id":"https://openalex.org/C74187038","wikidata":"https://www.wikidata.org/wiki/Q1418791","display_name":"Drug discovery","level":2,"score":0.5439091324806213},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.46491605043411255},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.39758729934692383},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3863186240196228},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.21801817417144775},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.18738117814064026},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/1758-2946-5-15","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1758-2946-5-15","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/1758-2946-5-15","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:23497729","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/23497729","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:europepmc.org:2619946","is_oa":true,"landing_page_url":"http://europepmc.org/articles/PMC3618305","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"pmh:oai:pubmedcentral.nih.gov:3618305","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/3618305","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Cheminform","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/1758-2946-5-15","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1758-2946-5-15","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/1758-2946-5-15","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2159489774.pdf","grobid_xml":"https://content.openalex.org/works/W2159489774.grobid-xml"},"referenced_works_count":8,"referenced_works":["https://openalex.org/W71547456","https://openalex.org/W1835740130","https://openalex.org/W2025633854","https://openalex.org/W2082299245","https://openalex.org/W2087400133","https://openalex.org/W2112208320","https://openalex.org/W2125335678","https://openalex.org/W2159444576"],"related_works":["https://openalex.org/W2104366126","https://openalex.org/W2547231910","https://openalex.org/W2235675062","https://openalex.org/W2177317049","https://openalex.org/W4293208996","https://openalex.org/W2169546341","https://openalex.org/W2127553917","https://openalex.org/W2085398335","https://openalex.org/W2811096937","https://openalex.org/W2008505552"],"abstract_inverted_index":{"BACKGROUND:":[0],"The":[1,22,125,153,239,253,264],"Pubchem":[2],"Database":[3],"is":[4,34,284],"a":[5,75,275],"large-scale":[6],"resource":[7],"for":[8,94],"chemical":[9,14,251],"information,":[10],"containing":[11,175],"millions":[12],"of":[13,32,81,120,135,155,172,186,201,250],"compound":[15,91],"activities":[16,213],"derived":[17],"by":[18,115,259],"high-throughput":[19],"screening":[20],"(HTS).":[21],"ability":[23],"to":[24,70,74,130,183,196,207,223,271,288],"extract":[25,272],"characteristic":[26],"substructures":[27,246],"from":[28,59,247],"such":[29],"enormous":[30],"amounts":[31],"data":[33,148],"steadily":[35],"growing":[36],"in":[37,214,226],"importance.":[38],"Compounds":[39],"with":[40,274],"shared":[41],"basic":[42],"active":[43,82,90,121,173],"structures":[44,193],"(BASs)":[45],"exhibiting":[46],"G-protein":[47],"coupled":[48],"receptor":[49],"(GPCR)":[50],"activity":[51],"and":[52,83,122,147,159,210,222,234],"repeated":[53,129],"dose":[54],"toxicity":[55],"have":[56],"been":[57],"mined":[58],"small":[60],"datasets.":[61],"However,":[62],"the":[63,79,132,136,156,164,170,187,198,215,227,280],"mining":[64,100,241,265],"process":[65,127],"employed":[66],"was":[67,113,128,151,205],"not":[68],"applicable":[69],"large":[71,76,248,290],"datasets":[72,249],"owing":[73],"imbalance":[77],"between":[78],"numbers":[80,107],"inactive":[84,97,123,137],"compounds.":[85,98,124,138],"In":[86],"most":[87],"datasets,":[88],"one":[89],"will":[92],"appear":[93],"every":[95],"1000":[96],"Most":[99],"techniques":[101],"work":[102],"well":[103],"only":[104],"when":[105],"these":[106,176],"are":[108],"similar.":[109],"RESULTS:":[110],"This":[111,203],"difficulty":[112],"overcome":[114],"sampling":[116,126,146,168],"an":[117,260,285],"equal":[118],"number":[119],"maintain":[131],"structural":[133,161],"diversity":[134],"An":[139],"interactive":[140],"KNIME":[141],"workflow":[142],"that":[143],"enabled":[144],"effective":[145,286],"cleaning":[149],"processes":[150],"created.":[152],"application":[154],"cascade":[157],"model":[158],"subsequent":[160],"refinement":[162],"yielded":[163],"BAS":[165],"candidates.":[166],"Repeated":[167],"increased":[169],"ratio":[171],"compounds":[174],"substructures.":[177],"Three":[178],"samplings":[179],"were":[180,194,256],"deemed":[181,257],"adequate":[182],"identify":[184],"all":[185],"meaningful":[188,245],"BASs.":[189,202],"BASs":[190,255,273],"expressing":[191],"similar":[192],"grouped":[195],"give":[197],"final":[199],"set":[200],"method":[204,281],"applied":[206],"HIV":[208],"integrase":[209],"protease":[211],"inhibitor":[212],"MDL":[216],"Drug":[217],"Data":[218],"Report":[219],"(MDDR)":[220],"database":[221],"procaspase-3":[224],"activators":[225],"PubChem":[228],"BioAssay":[229],"database,":[230],"yielding":[231],"14,":[232],"12,":[233],"18":[235],"BASs,":[236],"respectively.":[237],"CONCLUSIONS:":[238],"proposed":[240],"scheme":[242],"successfully":[243],"extracted":[244],"structures.":[252],"resulting":[254],"reasonable":[258],"experienced":[261],"medicinal":[262],"chemist.":[263],"itself":[266],"requires":[267],"about":[268],"3":[269],"days":[270],"given":[276],"physiological":[277],"activity.":[278],"Thus,":[279],"described":[282],"herein":[283],"way":[287],"analyze":[289],"HTS":[291],"databases.":[292]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2025-10-10T00:00:00"}
