{"id":"https://openalex.org/W4386221938","doi":"https://doi.org/10.1186/s13321-023-00743-7","title":"Practical guidelines for the use of gradient boosting for molecular property prediction","display_name":"Practical guidelines for the use of gradient boosting for molecular property prediction","publication_year":2023,"publication_date":"2023-08-28","ids":{"openalex":"https://openalex.org/W4386221938","doi":"https://doi.org/10.1186/s13321-023-00743-7","pmid":"https://pubmed.ncbi.nlm.nih.gov/37641120"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-023-00743-7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00743-7","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00743-7","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00743-7","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031395316","display_name":"Davide Boldini","orcid":"https://orcid.org/0000-0002-4109-3528"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Davide Boldini","raw_affiliation_strings":["Department of Bioscience, Center for Functional Protein Assemblies (CPA), Technical University of Munich, Garching bei Munich, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Bioscience, Center for Functional Protein Assemblies (CPA), Technical University of Munich, Garching bei Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078946433","display_name":"Francesca Grisoni","orcid":"https://orcid.org/0000-0001-8552-6615"},"institutions":[{"id":"https://openalex.org/I83019370","display_name":"Eindhoven University of Technology","ror":"https://ror.org/02c2kyt77","country_code":"NL","type":"education","lineage":["https://openalex.org/I83019370"]},{"id":"https://openalex.org/I173063890","display_name":"University of Applied Sciences Utrecht","ror":"https://ror.org/028z9kw20","country_code":"NL","type":"education","lineage":["https://openalex.org/I173063890"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Francesca Grisoni","raw_affiliation_strings":["Centre for Living Technologies, Alliance TU/E, WUR, UU, UMC Utrecht, Utrecht, The Netherlands","Department of Biomedical Engineering, Institute for Complex Molecular Sciences, Eindhoven University of Technology, Eindhoven, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Living Technologies, Alliance TU/E, WUR, UU, UMC Utrecht, Utrecht, The Netherlands","institution_ids":["https://openalex.org/I173063890"]},{"raw_affiliation_string":"Department of Biomedical Engineering, Institute for Complex Molecular Sciences, Eindhoven University of Technology, Eindhoven, The Netherlands","institution_ids":["https://openalex.org/I83019370"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078316788","display_name":"Daniel K\u00fchn","orcid":"https://orcid.org/0000-0002-0550-4174"},"institutions":[{"id":"https://openalex.org/I164928964","display_name":"Merck (Germany)","ror":"https://ror.org/04b2dty93","country_code":"DE","type":"company","lineage":["https://openalex.org/I164928964"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Daniel Kuhn","raw_affiliation_strings":["Merck Healthcare KGaA, Darmstadt, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Merck Healthcare KGaA, Darmstadt, Germany","institution_ids":["https://openalex.org/I164928964"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090953004","display_name":"Lukas Friedrich","orcid":"https://orcid.org/0000-0001-6190-5418"},"institutions":[{"id":"https://openalex.org/I164928964","display_name":"Merck (Germany)","ror":"https://ror.org/04b2dty93","country_code":"DE","type":"company","lineage":["https://openalex.org/I164928964"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Lukas Friedrich","raw_affiliation_strings":["Merck Healthcare KGaA, Darmstadt, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Merck Healthcare KGaA, Darmstadt, Germany","institution_ids":["https://openalex.org/I164928964"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048928672","display_name":"Stephan A. Sieber","orcid":"https://orcid.org/0000-0002-9400-906X"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stephan A. Sieber","raw_affiliation_strings":["Department of Bioscience, Center for Functional Protein Assemblies (CPA), Technical University of Munich, Garching bei Munich, Germany. stephan.sieber@tum.de","Department of Bioscience, Center for Functional Protein Assemblies (CPA), Technical University of Munich, Garching bei Munich, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Bioscience, Center for Functional Protein Assemblies (CPA), Technical University of Munich, Garching bei Munich, Germany. stephan.sieber@tum.de","institution_ids":[]},{"raw_affiliation_string":"Department of Bioscience, Center for Functional Protein Assemblies (CPA), Technical University of Munich, Garching bei Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5031395316"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1704,"currency":"EUR","value_usd":1837},"fwci":22.3266,"has_fulltext":true,"cited_by_count":115,"citation_normalized_percentile":{"value":0.99653241,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"15","issue":"1","first_page":"73","last_page":"73"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10908","display_name":"Analytical Chemistry and Chromatography","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.6605002880096436},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6125068068504333},{"id":"https://openalex.org/keywords/gradient-boosting","display_name":"Gradient boosting","score":0.5775702595710754},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.5275499224662781},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4278722107410431},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.42036929726600647},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.344113826751709},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3357306122779846},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.09841227531433105}],"concepts":[{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.6605002880096436},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6125068068504333},{"id":"https://openalex.org/C70153297","wikidata":"https://www.wikidata.org/wiki/Q5591907","display_name":"Gradient boosting","level":3,"score":0.5775702595710754},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.5275499224662781},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4278722107410431},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.42036929726600647},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.344113826751709},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3357306122779846},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.09841227531433105},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s13321-023-00743-7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00743-7","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00743-7","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:37641120","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37641120","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:10464382","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/10464382","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC10464382/pdf/13321_2023_Article_743.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Cheminform","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:ef3fe130feb445e3a9edaf794e2c3fb1","is_oa":true,"landing_page_url":"https://doaj.org/article/ef3fe130feb445e3a9edaf794e2c3fb1","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 15, Iss 1, Pp 1-13 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s13321-023-00743-7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00743-7","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00743-7","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5199999809265137,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320311647","display_name":"Merck KGaA","ror":"https://ror.org/04b2dty93"},{"id":"https://openalex.org/F4320323383","display_name":"Technische Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/02kkvpp62"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4386221938.pdf"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W1437335841","https://openalex.org/W1545231783","https://openalex.org/W1678356000","https://openalex.org/W2101234009","https://openalex.org/W2200017991","https://openalex.org/W2216946510","https://openalex.org/W2295598076","https://openalex.org/W2332896222","https://openalex.org/W2594183968","https://openalex.org/W2803206732","https://openalex.org/W2806547269","https://openalex.org/W2890097032","https://openalex.org/W2901476322","https://openalex.org/W2904029397","https://openalex.org/W2914484650","https://openalex.org/W2916327454","https://openalex.org/W2966357564","https://openalex.org/W2976332861","https://openalex.org/W2982583112","https://openalex.org/W2987201163","https://openalex.org/W2999615587","https://openalex.org/W3000478925","https://openalex.org/W3007309629","https://openalex.org/W3023204497","https://openalex.org/W3023211159","https://openalex.org/W3041286284","https://openalex.org/W3081125651","https://openalex.org/W3093687066","https://openalex.org/W3094280424","https://openalex.org/W3109675364","https://openalex.org/W3116202926","https://openalex.org/W3126326305","https://openalex.org/W3127521151","https://openalex.org/W3154258817","https://openalex.org/W3165195325","https://openalex.org/W3170120958","https://openalex.org/W3188673779","https://openalex.org/W3198664897","https://openalex.org/W3210755349","https://openalex.org/W3216660278","https://openalex.org/W4220904544","https://openalex.org/W4239510810","https://openalex.org/W4248855359","https://openalex.org/W4281671019","https://openalex.org/W4294365377","https://openalex.org/W4297957988","https://openalex.org/W4306294757","https://openalex.org/W4308699322","https://openalex.org/W4310603653","https://openalex.org/W6602163396"],"related_works":["https://openalex.org/W2967733078","https://openalex.org/W3204430031","https://openalex.org/W3137904399","https://openalex.org/W4310492845","https://openalex.org/W2885778889","https://openalex.org/W2766514146","https://openalex.org/W2885516856","https://openalex.org/W4289703016","https://openalex.org/W4310224730","https://openalex.org/W1985505753"],"abstract_inverted_index":{"Decision":[0],"tree":[1,135],"ensembles":[2],"are":[3],"among":[4],"the":[5,50,61,101,108,121,155,178,186],"most":[6,51],"robust,":[7],"high-performing":[8],"and":[9,40,56,85,133,164,198,206],"computationally":[10],"efficient":[11],"machine":[12],"learning":[13],"approaches":[14,67],"for":[15,30,68,113,191,203],"quantitative":[16],"structure-activity":[17],"relationship":[18],"(QSAR)":[19],"modeling.":[20],"Among":[21],"them,":[22],"gradient":[23,47,76,200],"boosting":[24,48,77,201],"has":[25],"recently":[26],"garnered":[27],"particular":[28],"attention,":[29],"its":[31],"performance":[32],"in":[33,92,130],"data":[34],"science":[35],"competitions,":[36],"virtual":[37,204],"screening":[38,205],"campaigns,":[39],"bioactivity":[41],"prediction.":[42],"However,":[43],"different":[44],"variants":[45],"of":[46,65,118,148,157,189],"exist,":[49],"popular":[52],"being":[53],"XGBoost,":[54],"LightGBM":[55,106],"CatBoost.":[57],"Our":[58,94],"study":[59,184],"provides":[60,185],"first":[62,187],"comprehensive":[63],"comparison":[64],"these":[66],"QSAR.":[69],"To":[70],"this":[71],"end,":[72],"we":[73],"trained":[74],"157,590":[75],"models,":[78],"which":[79],"were":[80],"evaluated":[81],"on":[82],"16":[83],"datasets":[84,163],"94":[86],"endpoints,":[87],"comprising":[88],"1.4":[89],"million":[90],"compounds":[91],"total.":[93],"results":[95,152],"show":[96,153],"that":[97,154,165],"XGBoost":[98],"generally":[99],"achieves":[100],"best":[102],"predictive":[103,179],"performance,":[104],"while":[105],"requires":[107],"least":[109],"training":[110],"time,":[111],"especially":[112],"larger":[114],"datasets.":[115],"In":[116,181],"terms":[117],"feature":[119],"importance,":[120],"models":[122,202],"surprisingly":[123],"rank":[124],"molecular":[125],"features":[126],"differently,":[127],"reflecting":[128],"differences":[129],"regularization":[131],"techniques":[132],"decision":[134],"structures.":[136],"Thus,":[137],"expert":[138],"knowledge":[139],"must":[140],"always":[141],"be":[142],"employed":[143],"when":[144],"evaluating":[145],"data-driven":[146],"explanations":[147],"bioactivity.":[149],"Furthermore,":[150],"our":[151,183],"relevance":[156],"each":[158],"hyperparameter":[159],"varies":[160],"greatly":[161],"across":[162],"it":[166],"is":[167],"crucial":[168],"to":[169,176,194],"optimize":[170,197],"as":[171,174],"many":[172],"hyperparameters":[173],"possible":[175],"maximize":[177],"performance.":[180],"conclusion,":[182],"set":[188],"guidelines":[190],"cheminformatics":[192],"practitioners":[193],"effectively":[195],"train,":[196],"evaluate":[199],"QSAR":[207],"applications.":[208]},"counts_by_year":[{"year":2026,"cited_by_count":29},{"year":2025,"cited_by_count":65},{"year":2024,"cited_by_count":20},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-02T08:42:23.175194","created_date":"2025-10-10T00:00:00"}
