{"id":"https://openalex.org/W2011238288","doi":"https://doi.org/10.1021/ci200303m","title":"Note on Naive Bayes Based on Binary Descriptors in Cheminformatics","display_name":"Note on Naive Bayes Based on Binary Descriptors in Cheminformatics","publication_year":2012,"publication_date":"2012-08-19","ids":{"openalex":"https://openalex.org/W2011238288","doi":"https://doi.org/10.1021/ci200303m","mag":"2011238288","pmid":"https://pubmed.ncbi.nlm.nih.gov/22900941"},"language":"en","primary_location":{"id":"doi:10.1021/ci200303m","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci200303m","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029919865","display_name":"Joe Townsend","orcid":"https://orcid.org/0000-0002-5478-0028"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Joe A. Townsend","raw_affiliation_strings":["Unilever Centre for Molecular Science Informatics, Department\rof Chemistry, University of Cambridge,\rLensfield Road, Cambridge CB2 1EW, U.K"],"affiliations":[{"raw_affiliation_string":"Unilever Centre for Molecular Science Informatics, Department\rof Chemistry, University of Cambridge,\rLensfield Road, Cambridge CB2 1EW, U.K","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014283781","display_name":"Robert C. Glen","orcid":"https://orcid.org/0000-0003-1759-2914"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Robert C. Glen","raw_affiliation_strings":["Unilever Centre for Molecular Science Informatics, Department\rof Chemistry, University of Cambridge,\rLensfield Road, Cambridge CB2 1EW, U.K","Unilever Centre for Molecular Science Informatics, Department#R#of Chemistry, University of Cambridge,#R#Lensfield Road, Cambridge CB2 1EW, U.K"],"affiliations":[{"raw_affiliation_string":"Unilever Centre for Molecular Science Informatics, Department\rof Chemistry, University of Cambridge,\rLensfield Road, Cambridge CB2 1EW, U.K","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"Unilever Centre for Molecular Science Informatics, Department#R#of Chemistry, University of Cambridge,#R#Lensfield Road, Cambridge CB2 1EW, U.K","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001996295","display_name":"Hamse Y. Mussa","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Hamse Y. Mussa","raw_affiliation_strings":["Unilever Centre for Molecular Science Informatics, Department\rof Chemistry, University of Cambridge,\rLensfield Road, Cambridge CB2 1EW, U.K","Unilever Centre for Molecular Science Informatics, Department#R#of Chemistry, University of Cambridge,#R#Lensfield Road, Cambridge CB2 1EW, U.K"],"affiliations":[{"raw_affiliation_string":"Unilever Centre for Molecular Science Informatics, Department\rof Chemistry, University of Cambridge,\rLensfield Road, Cambridge CB2 1EW, U.K","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"Unilever Centre for Molecular Science Informatics, Department#R#of Chemistry, University of Cambridge,#R#Lensfield Road, Cambridge CB2 1EW, U.K","institution_ids":["https://openalex.org/I241749"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5029919865"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":1.3478,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.80423395,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"52","issue":"10","first_page":"2494","last_page":"2500"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10640","display_name":"Spectroscopy and Chemometric Analyses","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1602","display_name":"Analytical Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9632999897003174,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cheminformatics","display_name":"Cheminformatics","score":0.9660494327545166},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.8085999488830566},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6228705048561096},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5871906280517578},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.5752394199371338},{"id":"https://openalex.org/keywords/chemical-space","display_name":"Chemical space","score":0.5730858445167542},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5294175744056702},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5023884773254395},{"id":"https://openalex.org/keywords/molecular-descriptor","display_name":"Molecular descriptor","score":0.4770711660385132},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4761131703853607},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.475536584854126},{"id":"https://openalex.org/keywords/bayes-theorem","display_name":"Bayes' theorem","score":0.47162342071533203},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.43367859721183777},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.42119234800338745},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.41855114698410034},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.30074378848075867},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.29372644424438477},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.22871321439743042},{"id":"https://openalex.org/keywords/quantitative-structure\u2013activity-relationship","display_name":"Quantitative structure\u2013activity relationship","score":0.1737147569656372},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.16558045148849487},{"id":"https://openalex.org/keywords/drug-discovery","display_name":"Drug discovery","score":0.1554805040359497},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.1255185604095459}],"concepts":[{"id":"https://openalex.org/C68762167","wikidata":"https://www.wikidata.org/wiki/Q910164","display_name":"Cheminformatics","level":2,"score":0.9660494327545166},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.8085999488830566},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6228705048561096},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5871906280517578},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.5752394199371338},{"id":"https://openalex.org/C99726746","wikidata":"https://www.wikidata.org/wiki/Q906396","display_name":"Chemical space","level":3,"score":0.5730858445167542},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5294175744056702},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5023884773254395},{"id":"https://openalex.org/C164923092","wikidata":"https://www.wikidata.org/wiki/Q3705921","display_name":"Molecular descriptor","level":3,"score":0.4770711660385132},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4761131703853607},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.475536584854126},{"id":"https://openalex.org/C207201462","wikidata":"https://www.wikidata.org/wiki/Q182505","display_name":"Bayes' theorem","level":3,"score":0.47162342071533203},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.43367859721183777},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.42119234800338745},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.41855114698410034},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.30074378848075867},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29372644424438477},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.22871321439743042},{"id":"https://openalex.org/C164126121","wikidata":"https://www.wikidata.org/wiki/Q766383","display_name":"Quantitative structure\u2013activity relationship","level":2,"score":0.1737147569656372},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.16558045148849487},{"id":"https://openalex.org/C74187038","wikidata":"https://www.wikidata.org/wiki/Q1418791","display_name":"Drug discovery","level":2,"score":0.1554805040359497},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.1255185604095459},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001499","descriptor_name":"Bayes Theorem","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001499","descriptor_name":"Bayes Theorem","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001499","descriptor_name":"Bayes Theorem","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001688","descriptor_name":"Biological Products","qualifier_ui":"Q000494","qualifier_name":"pharmacology","is_major_topic":false},{"descriptor_ui":"D001688","descriptor_name":"Biological Products","qualifier_ui":"Q000494","qualifier_name":"pharmacology","is_major_topic":false},{"descriptor_ui":"D001688","descriptor_name":"Biological Products","qualifier_ui":"Q000494","qualifier_name":"pharmacology","is_major_topic":false},{"descriptor_ui":"D001688","descriptor_name":"Biological Products","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D001688","descriptor_name":"Biological Products","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D001688","descriptor_name":"Biological Products","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D004791","descriptor_name":"Enzyme Inhibitors","qualifier_ui":"Q000494","qualifier_name":"pharmacology","is_major_topic":false},{"descriptor_ui":"D004791","descriptor_name":"Enzyme Inhibitors","qualifier_ui":"Q000494","qualifier_name":"pharmacology","is_major_topic":false},{"descriptor_ui":"D004791","descriptor_name":"Enzyme Inhibitors","qualifier_ui":"Q000494","qualifier_name":"pharmacology","is_major_topic":false},{"descriptor_ui":"D004791","descriptor_name":"Enzyme Inhibitors","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D004791","descriptor_name":"Enzyme Inhibitors","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D004791","descriptor_name":"Enzyme Inhibitors","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013329","descriptor_name":"Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013329","descriptor_name":"Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013329","descriptor_name":"Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D048088","descriptor_name":"Informatics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D048088","descriptor_name":"Informatics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D048088","descriptor_name":"Informatics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1021/ci200303m","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci200303m","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:22900941","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/22900941","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1973420497","https://openalex.org/W1974041968","https://openalex.org/W2005446291","https://openalex.org/W2009086942","https://openalex.org/W2019044526","https://openalex.org/W2020756525","https://openalex.org/W2024432350","https://openalex.org/W2030087063","https://openalex.org/W2043909051","https://openalex.org/W2062145093","https://openalex.org/W2065160746","https://openalex.org/W2084134149","https://openalex.org/W2112912103","https://openalex.org/W2118020555","https://openalex.org/W2158698691","https://openalex.org/W2200017991","https://openalex.org/W2318405211","https://openalex.org/W2409278974"],"related_works":["https://openalex.org/W4382246684","https://openalex.org/W4387845407","https://openalex.org/W2902857455","https://openalex.org/W4207004502","https://openalex.org/W4212982662","https://openalex.org/W2160244398","https://openalex.org/W2907885506","https://openalex.org/W2022444336","https://openalex.org/W4207063555","https://openalex.org/W3042998478"],"abstract_inverted_index":{"A":[0],"plethora":[1],"of":[2,37,107,115],"articles":[3],"on":[4,49,111],"naive":[5,45],"Bayes":[6,46],"classifiers,":[7],"where":[8],"the":[9,27,31,68,105,108,116],"chemical":[10],"compounds":[11,93],"to":[12,41],"be":[13,54],"classified":[14],"are":[15,95],"represented":[16],"by":[17],"binary-valued":[18],"(absent":[19],"or":[20],"present":[21],"type)":[22],"descriptors,":[23],"have":[24],"appeared":[25],"in":[26,60,77,98],"cheminformatics":[28],"literature":[29],"over":[30],"past":[32],"decade.":[33],"The":[34],"principal":[35],"goal":[36],"this":[38],"paper":[39],"is":[40,82,87],"describe":[42],"how":[43],"a":[44,57,73,83,113,120],"classifier":[47,85],"based":[48],"binary":[50,100],"descriptors":[51],"(NBCBBD)":[52],"can":[53],"employed":[55],"as":[56],"feature":[58],"selector":[59],"an":[61],"efficient":[62],"manner":[63],"suitable":[64],"for":[65,91],"cheminformatics.":[66],"In":[67],"process,":[69],"we":[70],"point":[71],"out":[72],"fact":[74],"well":[75],"documented":[76],"other":[78],"disciplines":[79],"that":[80,94],"NBCBBD":[81],"linear":[84],"and":[86,128],"therefore":[88],"intrinsically":[89],"suboptimal":[90],"classifying":[92,112],"nonlinearly":[96],"separable":[97],"their":[99],"descriptor":[101],"space.":[102],"We":[103],"investigate":[104],"performance":[106],"proposed":[109],"algorithm":[110],"subset":[114],"MDDR":[117],"data":[118,124],"set,":[119,125],"standard":[121],"molecular":[122],"benchmark":[123],"into":[126],"active":[127],"inactive":[129],"compounds.":[130]},"counts_by_year":[{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":2}],"updated_date":"2026-02-24T19:35:01.260952","created_date":"2025-10-10T00:00:00"}
