{"id":"https://openalex.org/W2215622313","doi":"https://doi.org/10.1021/ci034254q","title":"Data Mining and Machine Learning Techniques for the Identification of Mutagenicity Inducing Substructures and Structure Activity Relationships of Noncongeneric Compounds","display_name":"Data Mining and Machine Learning Techniques for the Identification of Mutagenicity Inducing Substructures and Structure Activity Relationships of Noncongeneric Compounds","publication_year":2004,"publication_date":"2004-06-10","ids":{"openalex":"https://openalex.org/W2215622313","doi":"https://doi.org/10.1021/ci034254q","mag":"2215622313","pmid":"https://pubmed.ncbi.nlm.nih.gov/15272848"},"language":"en","primary_location":{"id":"doi:10.1021/ci034254q","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci034254q","pdf_url":null,"source":{"id":"https://openalex.org/S171559003","display_name":"Journal of Chemical Information and Computer Sciences","issn_l":"0095-2338","issn":["0095-2338","1520-5142"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Computer Sciences","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://gateway.newisiknowledge.com/gateway/Gateway.cgi?GWVersion=2&amp;SrcAuth=CCC&amp;SrcApp=PRODUCT_NAME&amp;SrcURL=WOS_RETURN_URL&amp;CKEY=HELM1402040044JC&amp;DestLinkType=FullRecord&amp;DestApp=CCC&amp;SrcDesc=RETURN_ALT_TEXT&amp;SrcAppSID=APP_SID","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066862195","display_name":"Christoph Helma","orcid":"https://orcid.org/0000-0002-2640-798X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Christoph Helma","raw_affiliation_strings":["Institute for Computer Science, Machine Learning Lab, University Freiburg, Georges K\u00f6hler Allee 79, D-79110 Freiburg/Br., Germany, and Institute for Computer Science, Technical University Munich, Boltzmannstrasse 3, D-85748 Garching, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Computer Science, Machine Learning Lab, University Freiburg, Georges K\u00f6hler Allee 79, D-79110 Freiburg/Br., Germany, and Institute for Computer Science, Technical University Munich, Boltzmannstrasse 3, D-85748 Garching, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007993158","display_name":"Tobias Cramer","orcid":"https://orcid.org/0000-0002-5993-3388"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tobias Cramer","raw_affiliation_strings":["Institute for Computer Science, Machine Learning Lab, University Freiburg, Georges K\u00f6hler Allee 79, D-79110 Freiburg/Br., Germany, and Institute for Computer Science, Technical University Munich, Boltzmannstrasse 3, D-85748 Garching, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Computer Science, Machine Learning Lab, University Freiburg, Georges K\u00f6hler Allee 79, D-79110 Freiburg/Br., Germany, and Institute for Computer Science, Technical University Munich, Boltzmannstrasse 3, D-85748 Garching, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006247481","display_name":"Stefan Kr\u00e4mer","orcid":"https://orcid.org/0000-0002-0071-9344"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stefan Kramer","raw_affiliation_strings":["Institute for Computer Science, Machine Learning Lab, University Freiburg, Georges K\u00f6hler Allee 79, D-79110 Freiburg/Br., Germany, and Institute for Computer Science, Technical University Munich, Boltzmannstrasse 3, D-85748 Garching, Germany","University Freiburg"],"affiliations":[{"raw_affiliation_string":"Institute for Computer Science, Machine Learning Lab, University Freiburg, Georges K\u00f6hler Allee 79, D-79110 Freiburg/Br., Germany, and Institute for Computer Science, Technical University Munich, Boltzmannstrasse 3, D-85748 Garching, Germany","institution_ids":[]},{"raw_affiliation_string":"University Freiburg","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078136609","display_name":"Luc De Raedt","orcid":"https://orcid.org/0000-0002-6860-6303"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luc De Raedt","raw_affiliation_strings":["Institute for Computer Science, Machine Learning Lab, University Freiburg, Georges K\u00f6hler Allee 79, D-79110 Freiburg/Br., Germany, and Institute for Computer Science, Technical University Munich, Boltzmannstrasse 3, D-85748 Garching, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Computer Science, Machine Learning Lab, University Freiburg, Georges K\u00f6hler Allee 79, D-79110 Freiburg/Br., Germany, and Institute for Computer Science, Technical University Munich, Boltzmannstrasse 3, D-85748 Garching, Germany","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5066862195"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":10.708,"has_fulltext":false,"cited_by_count":244,"citation_normalized_percentile":{"value":0.98809588,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"44","issue":"4","first_page":"1402","last_page":"1411"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13180","display_name":"Chemistry and Chemical Engineering","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/2304","display_name":"Environmental Chemistry"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.7580174803733826},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6985198259353638},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6293864846229553},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.574639081954956},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5634621381759644},{"id":"https://openalex.org/keywords/usable","display_name":"USable","score":0.51082444190979},{"id":"https://openalex.org/keywords/molecular-descriptor","display_name":"Molecular descriptor","score":0.48721474409103394},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.37410491704940796},{"id":"https://openalex.org/keywords/quantitative-structure\u2013activity-relationship","display_name":"Quantitative structure\u2013activity relationship","score":0.2987722158432007}],"concepts":[{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7580174803733826},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6985198259353638},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6293864846229553},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.574639081954956},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5634621381759644},{"id":"https://openalex.org/C2780615836","wikidata":"https://www.wikidata.org/wiki/Q2471869","display_name":"USable","level":2,"score":0.51082444190979},{"id":"https://openalex.org/C164923092","wikidata":"https://www.wikidata.org/wiki/Q3705921","display_name":"Molecular descriptor","level":3,"score":0.48721474409103394},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37410491704940796},{"id":"https://openalex.org/C164126121","wikidata":"https://www.wikidata.org/wiki/Q766383","display_name":"Quantitative structure\u2013activity relationship","level":2,"score":0.2987722158432007},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009152","descriptor_name":"Mutagenicity Tests","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D009152","descriptor_name":"Mutagenicity Tests","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D009152","descriptor_name":"Mutagenicity Tests","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D009153","descriptor_name":"Mutagens","qualifier_ui":"Q000633","qualifier_name":"toxicity","is_major_topic":false},{"descriptor_ui":"D009153","descriptor_name":"Mutagens","qualifier_ui":"Q000633","qualifier_name":"toxicity","is_major_topic":false},{"descriptor_ui":"D009153","descriptor_name":"Mutagens","qualifier_ui":"Q000633","qualifier_name":"toxicity","is_major_topic":false},{"descriptor_ui":"D009153","descriptor_name":"Mutagens","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D009153","descriptor_name":"Mutagens","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D009153","descriptor_name":"Mutagens","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D013329","descriptor_name":"Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013329","descriptor_name":"Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013329","descriptor_name":"Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":5,"locations":[{"id":"doi:10.1021/ci034254q","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci034254q","pdf_url":null,"source":{"id":"https://openalex.org/S171559003","display_name":"Journal of Chemical Information and Computer Sciences","issn_l":"0095-2338","issn":["0095-2338","1520-5142"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Computer Sciences","raw_type":"journal-article"},{"id":"pmid:15272848","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/15272848","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and computer sciences","raw_type":null},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.411.7584","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.411.7584","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://cbio.ensmp.fr/~jvert/svn/bibli/local/Helma2004Data.pdf","raw_type":"text"},{"id":"pmh:oai:cris.unibo.it:11585/566233","is_oa":false,"landing_page_url":"http://hdl.handle.net/11585/566233","pdf_url":null,"source":{"id":"https://openalex.org/S4306402579","display_name":"Archivio istituzionale della ricerca (Alma Mater Studiorum Universit\u00e0 di Bologna)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210117483","host_organization_name":"Istituto di Ematologia di Bologna","host_organization_lineage":["https://openalex.org/I4210117483"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:lirias2repo.kuleuven.be:123456789/124533","is_oa":true,"landing_page_url":"http://gateway.newisiknowledge.com/gateway/Gateway.cgi?GWVersion=2&amp;SrcAuth=CCC&amp;SrcApp=PRODUCT_NAME&amp;SrcURL=WOS_RETURN_URL&amp;CKEY=HELM1402040044JC&amp;DestLinkType=FullRecord&amp;DestApp=CCC&amp;SrcDesc=RETURN_ALT_TEXT&amp;SrcAppSID=APP_SID","pdf_url":null,"source":{"id":"https://openalex.org/S4306401954","display_name":"Lirias (KU Leuven)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I99464096","host_organization_name":"KU Leuven","host_organization_lineage":["https://openalex.org/I99464096"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of chemical information and computer sciences, vol. 44 (4), (1402-1411)","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:lirias2repo.kuleuven.be:123456789/124533","is_oa":true,"landing_page_url":"http://gateway.newisiknowledge.com/gateway/Gateway.cgi?GWVersion=2&amp;SrcAuth=CCC&amp;SrcApp=PRODUCT_NAME&amp;SrcURL=WOS_RETURN_URL&amp;CKEY=HELM1402040044JC&amp;DestLinkType=FullRecord&amp;DestApp=CCC&amp;SrcDesc=RETURN_ALT_TEXT&amp;SrcAppSID=APP_SID","pdf_url":null,"source":{"id":"https://openalex.org/S4306401954","display_name":"Lirias (KU Leuven)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I99464096","host_organization_name":"KU Leuven","host_organization_lineage":["https://openalex.org/I99464096"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of chemical information and computer sciences, vol. 44 (4), (1402-1411)","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"score":0.5699999928474426,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W96200431","https://openalex.org/W1528113134","https://openalex.org/W1533958480","https://openalex.org/W1545231783","https://openalex.org/W1966785965","https://openalex.org/W1970202530","https://openalex.org/W1975147762","https://openalex.org/W1984859898","https://openalex.org/W1985104873","https://openalex.org/W1985459162","https://openalex.org/W2004678494","https://openalex.org/W2008764714","https://openalex.org/W2014123957","https://openalex.org/W2015073280","https://openalex.org/W2018636731","https://openalex.org/W2021045190","https://openalex.org/W2023946205","https://openalex.org/W2051670380","https://openalex.org/W2051718455","https://openalex.org/W2058581741","https://openalex.org/W2060953898","https://openalex.org/W2090382722","https://openalex.org/W2125055259","https://openalex.org/W2156909104","https://openalex.org/W2966207845"],"related_works":["https://openalex.org/W2982321410","https://openalex.org/W95465806","https://openalex.org/W2392004567","https://openalex.org/W2940029036","https://openalex.org/W2756595502","https://openalex.org/W2010789764","https://openalex.org/W2187233292","https://openalex.org/W2219281195","https://openalex.org/W4389422031","https://openalex.org/W2532162280"],"abstract_inverted_index":{"This":[0],"paper":[1],"explores":[2],"the":[3,13,33,55,66,79,111,115,120,129,133,136],"utility":[4],"of":[5,15,35,57,78,85,105],"data":[6,22],"mining":[7],"and":[8,48,73,124,163],"machine":[9,51,117],"learning":[10,52,118,137],"algorithms":[11,53,138],"for":[12,32,39,54,70,152,165,170],"induction":[14,56],"mutagenicity":[16],"structure-activity":[17],"relationships":[18],"(SARs)":[19],"from":[20,59],"noncongeneric":[21,40],"sets.":[23],"We":[24,142],"compare":[25],"(i)":[26],"a":[27],"newly":[28],"developed":[29],"algorithm":[30],"(MOLFEA)":[31],"generation":[34],"descriptors":[36,90,106],"(molecular":[37,46],"fragments)":[38],"compounds":[41],"with":[42],"traditional":[43],"SAR":[44],"approaches":[45],"properties)":[47],"(ii)":[49],"different":[50],"SARs":[58],"these":[60,71],"descriptors.":[61],"In":[62],"addition":[63],"we":[64],"investigate":[65],"optimal":[67],"parameter":[68],"settings":[69],"programs":[72],"give":[74],"an":[75],"exemplary":[76],"interpretation":[77],"derived":[80,89,112],"models.":[81,113],"The":[82,155],"predictive":[83,147,166],"accuracies":[84,148],"models":[86,157],"using":[87,98],"MOLFEA":[88],"is":[91],"approximately":[92],"10-15%age":[93],"points":[94],"higher":[95],"than":[96],"those":[97],"molecular":[99],"properties":[100],"alone.":[101],"Using":[102],"both":[103],"types":[104],"together":[107],"does":[108],"not":[109],"improve":[110],"From":[114],"applied":[116],"techniques":[119],"rule":[121],"learner":[122],"PART":[123],"support":[125],"vector":[126],"machines":[127],"gave":[128],"best":[130],"results,":[131],"although":[132],"differences":[134],"between":[135],"are":[139,158],"only":[140],"marginal.":[141],"were":[143],"able":[144],"to":[145,150,161],"achieve":[146],"up":[149],"78%":[151],"10-fold":[153],"cross-validation.":[154],"resulting":[156],"relatively":[159],"easy":[160],"interpret":[162],"usable":[164],"as":[167,169],"well":[168],"explanatory":[171],"purposes.":[172]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":32},{"year":2021,"cited_by_count":38},{"year":2020,"cited_by_count":12},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":11},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":13},{"year":2015,"cited_by_count":10},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":11}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
