{"id":"https://openalex.org/W3171856801","doi":"https://doi.org/10.1145/3447548.3467278","title":"Probabilistic Gradient Boosting Machines for Large-Scale Probabilistic Regression","display_name":"Probabilistic Gradient Boosting Machines for Large-Scale Probabilistic Regression","publication_year":2021,"publication_date":"2021-08-12","ids":{"openalex":"https://openalex.org/W3171856801","doi":"https://doi.org/10.1145/3447548.3467278","mag":"3171856801"},"language":"en","primary_location":{"id":"doi:10.1145/3447548.3467278","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3447548.3467278","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.01682","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078355434","display_name":"Olivier Sprangers","orcid":"https://orcid.org/0000-0002-0533-4574"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]},{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Olivier Sprangers","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090934117","display_name":"Sebastian Schelter","orcid":"https://orcid.org/0000-0003-4722-5840"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Sebastian Schelter","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031439294","display_name":"Maarten de Rijke","orcid":"https://orcid.org/0000-0002-1086-0202"},"institutions":[{"id":"https://openalex.org/I4210112722","display_name":"Ahold Delhaize (Netherlands)","ror":"https://ror.org/01v6p2g18","country_code":"NL","type":"company","lineage":["https://openalex.org/I4210112722"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Maarten de Rijke","raw_affiliation_strings":["University of Amsterdam &amp; Ahold Delhaize, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam &amp; Ahold Delhaize, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210112722","https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5078355434"],"corresponding_institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":3.2199,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.93114465,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1510","last_page":"1520"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11918","display_name":"Forecasting Techniques and Applications","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.8231087923049927},{"id":"https://openalex.org/keywords/gradient-boosting","display_name":"Gradient boosting","score":0.6936302185058594},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6742632985115051},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.6657622456550598},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.5618128776550293},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.47012460231781006},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4675421118736267},{"id":"https://openalex.org/keywords/probabilistic-relevance-model","display_name":"Probabilistic relevance model","score":0.46477511525154114},{"id":"https://openalex.org/keywords/probabilistic-forecasting","display_name":"Probabilistic forecasting","score":0.4634983241558075},{"id":"https://openalex.org/keywords/probabilistic-classification","display_name":"Probabilistic classification","score":0.44838476181030273},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.447252094745636},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.4275045394897461},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.4264239966869354},{"id":"https://openalex.org/keywords/probabilistic-analysis-of-algorithms","display_name":"Probabilistic analysis of algorithms","score":0.40892481803894043},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.14325708150863647}],"concepts":[{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.8231087923049927},{"id":"https://openalex.org/C70153297","wikidata":"https://www.wikidata.org/wiki/Q5591907","display_name":"Gradient boosting","level":3,"score":0.6936302185058594},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6742632985115051},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.6657622456550598},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.5618128776550293},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47012460231781006},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4675421118736267},{"id":"https://openalex.org/C143017306","wikidata":"https://www.wikidata.org/wiki/Q3318133","display_name":"Probabilistic relevance model","level":4,"score":0.46477511525154114},{"id":"https://openalex.org/C122282355","wikidata":"https://www.wikidata.org/wiki/Q7246855","display_name":"Probabilistic forecasting","level":3,"score":0.4634983241558075},{"id":"https://openalex.org/C189119545","wikidata":"https://www.wikidata.org/wiki/Q5128022","display_name":"Probabilistic classification","level":4,"score":0.44838476181030273},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.447252094745636},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.4275045394897461},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.4264239966869354},{"id":"https://openalex.org/C24404364","wikidata":"https://www.wikidata.org/wiki/Q7246846","display_name":"Probabilistic analysis of algorithms","level":3,"score":0.40892481803894043},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.14325708150863647},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3447548.3467278","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3447548.3467278","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:dare.uva.nl:openaire_cris_publications/5b2a1d6f-55a8-42f8-b4d3-e0bb6ab9c018","is_oa":false,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/probabilistic-gradient-boosting-machines-for-largescale-probabilistic-regression(5b2a1d6f-55a8-42f8-b4d3-e0bb6ab9c018).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sprangers, O, Schelter, S & de Rijke, M 2021, Probabilistic Gradient Boosting Machines for Large-Scale Probabilistic Regression. in KDD \u201921 : Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining : August 14-18, 2021, virtual event, Singapore. Association for Computing Machinery, New York, NY, pp. 1510-1520, 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Singapore, Singapore, 14/08/21. https://doi.org/10.1145/3447548.3467278","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:arXiv.org:2106.01682","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.01682","pdf_url":"https://arxiv.org/pdf/2106.01682","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:uvapub:oai:dare.uva.nl:publications/5b2a1d6f-55a8-42f8-b4d3-e0bb6ab9c018","is_oa":false,"landing_page_url":"https://dare.uva.nl/personal/pure/en/publications/probabilistic-gradient-boosting-machines-for-largescale-probabilistic-regression(5b2a1d6f-55a8-42f8-b4d3-e0bb6ab9c018).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"KDD \u201921: Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining : August 14-18, 2021, virtual event, Singapore, 1510 - 1520","raw_type":"info:eu-repo/semantics/conferencepaper"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2106.01682","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.01682","pdf_url":"https://arxiv.org/pdf/2106.01682","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6000000238418579}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1573647811","https://openalex.org/W1678356000","https://openalex.org/W2016287239","https://openalex.org/W2126292488","https://openalex.org/W2154065358","https://openalex.org/W2261958379","https://openalex.org/W2295598076","https://openalex.org/W2747599906","https://openalex.org/W2753069234","https://openalex.org/W2768348081","https://openalex.org/W2769752211","https://openalex.org/W2811507150","https://openalex.org/W2954731415","https://openalex.org/W2970309699","https://openalex.org/W2970631142","https://openalex.org/W2970971581","https://openalex.org/W2979179852","https://openalex.org/W2980994438","https://openalex.org/W2986988027","https://openalex.org/W2996331899","https://openalex.org/W3000577762","https://openalex.org/W3035562930","https://openalex.org/W3042623101","https://openalex.org/W3094138326","https://openalex.org/W3099006712","https://openalex.org/W3102476541","https://openalex.org/W3171884590","https://openalex.org/W4200370185","https://openalex.org/W4206173445","https://openalex.org/W4246587917","https://openalex.org/W4295312788","https://openalex.org/W4301184128"],"related_works":["https://openalex.org/W2312482938","https://openalex.org/W1588585209","https://openalex.org/W4319159632","https://openalex.org/W3083111399","https://openalex.org/W2979812796","https://openalex.org/W3035562930","https://openalex.org/W2592036976","https://openalex.org/W2944327260","https://openalex.org/W2494523064","https://openalex.org/W2769304616"],"abstract_inverted_index":{"Gradient":[0,66],"Boosting":[1,67],"Machines":[2,68],"(GBM)":[3],"are":[4,14],"hugely":[5],"popular":[6],"for":[7,60],"solving":[8],"tabular":[9],"data":[10],"problems.":[11],"However,":[12],"practitioners":[13],"not":[15],"only":[16,168],"interested":[17],"in":[18,23,26,83,93,110,155,200,220,229],"point":[19,153,221],"predictions,":[20],"but":[21],"also":[22],"probabilistic":[24,37,74,148,162,198,230],"predictions":[25,38,75],"order":[27],"to":[28,57,72,124,141,181,217,226],"quantify":[29],"the":[30,33,90,103,136],"uncertainty":[31],"of":[32,80,107,138,179,184],"predictions.":[34],"Creating":[35],"such":[36,207],"is":[39],"difficult":[40],"with":[41,76,202],"existing":[42,142,187],"GBM-based":[43],"solutions:":[44],"they":[45,52],"either":[46],"require":[47],"training":[48],"multiple":[49],"models":[50],"or":[51],"become":[53],"too":[54],"computationally":[55,85],"expensive":[56],"be":[58],"useful":[59],"large-scale":[61],"settings.":[62],"We":[63,133],"propose":[64],"Probabilistic":[65],"(PGBM),":[69],"a":[70,77,84,94,98,111,128,156,165,177],"method":[71],"create":[73],"single":[78,157,166],"ensemble":[79,116],"decision":[81,95],"trees":[82],"efficient":[86],"manner.":[87],"PGBM":[88,139,146,160,195],"approximates":[89,102],"leaf":[91],"weights":[92],"tree":[96,115],"as":[97,208],"random":[99],"variable,":[100],"and":[101,105,174,193,224],"mean":[104],"variance":[106],"each":[108],"sample":[109,126],"dataset":[112],"via":[113,164],"stochastic":[114],"update":[117],"equations.":[118],"These":[119],"learned":[120],"moments":[121],"allow":[122],"us":[123],"subsequently":[125],"from":[127],"specified":[129],"distribution":[130],"after":[131],"training.":[132],"empirically":[134],"demonstrate":[135],"advantages":[137],"compared":[140],"state-of-the-art":[143,188],"methods:":[144],"(i)":[145],"enables":[147],"estimates":[149,163,199],"without":[150,170],"compromising":[151],"on":[152,190],"performance":[154,223],"model,":[158],"(ii)":[159],"learns":[161],"model":[167],"(and":[169],"requiring":[171],"multi-parameter":[172],"boosting),":[173],"thereby":[175],"offers":[176],"speedup":[178],"up":[180,216,225],"several":[182],"orders":[183],"magnitude":[185],"over":[186],"methods":[189],"large":[191],"datasets,":[192],"(iii)":[194],"achieves":[196],"accurate":[197],"tasks":[201],"complex":[203],"differentiable":[204],"loss":[205],"functions,":[206],"hierarchical":[209],"time":[210],"series":[211],"problems,":[212],"where":[213],"we":[214],"observed":[215],"10%":[218],"improvement":[219,228],"forecasting":[222,231],"300%":[227],"performance.":[232]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":3}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2021-06-22T00:00:00"}
