{"id":"https://openalex.org/W2948000013","doi":"https://doi.org/10.1109/tpds.2019.2920131","title":"Exploiting GPUs for Efficient Gradient Boosting Decision Tree Training","display_name":"Exploiting GPUs for Efficient Gradient Boosting Decision Tree Training","publication_year":2019,"publication_date":"2019-05-31","ids":{"openalex":"https://openalex.org/W2948000013","doi":"https://doi.org/10.1109/tpds.2019.2920131","mag":"2948000013"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2019.2920131","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2019.2920131","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013127195","display_name":"Zeyi Wen","orcid":"https://orcid.org/0000-0003-3370-6053"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Zeyi Wen","raw_affiliation_strings":["SoC, National University of Singapore, Singapore","SOC, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"SoC, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"SOC, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029141857","display_name":"Jiashuai Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiashuai Shi","raw_affiliation_strings":["South China University of Technology, Guangzhou, China","[South China University of Technology, Guangzhou, GuangDong, China]"],"affiliations":[{"raw_affiliation_string":"South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]},{"raw_affiliation_string":"[South China University of Technology, Guangzhou, GuangDong, China]","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039946576","display_name":"Bingsheng He","orcid":"https://orcid.org/0000-0001-8618-4581"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Bingsheng He","raw_affiliation_strings":["SoC, National University of Singapore, Singapore","SOC, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"SoC, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"SOC, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100326501","display_name":"Jian Chen","orcid":"https://orcid.org/0000-0003-4769-1526"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Chen","raw_affiliation_strings":["South China University of Technology, Guangzhou, China","[South China University of Technology, Guangzhou, GuangDong, China]"],"affiliations":[{"raw_affiliation_string":"South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]},{"raw_affiliation_string":"[South China University of Technology, Guangzhou, GuangDong, China]","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069895484","display_name":"Kotagiri Ramamohanarao","orcid":"https://orcid.org/0000-0003-3304-9268"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Kotagiri Ramamohanarao","raw_affiliation_strings":["The University of Melbourne, Parkville, Australia","The University of Melbourne, Parkville, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne, Parkville, Australia","institution_ids":["https://openalex.org/I165779595"]},{"raw_affiliation_string":"The University of Melbourne, Parkville, VIC, Australia","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055253913","display_name":"Qinbin Li","orcid":"https://orcid.org/0000-0002-6539-6443"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Qinbin Li","raw_affiliation_strings":["SoC, National University of Singapore, Singapore","SOC, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"SoC, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"SOC, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5013127195"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":3.9204,"has_fulltext":false,"cited_by_count":57,"citation_normalized_percentile":{"value":0.94921888,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"30","issue":"12","first_page":"2706","last_page":"2717"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8863489627838135},{"id":"https://openalex.org/keywords/histogram","display_name":"Histogram","score":0.5978770852088928},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.5694020986557007},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5654365420341492},{"id":"https://openalex.org/keywords/workstation","display_name":"Workstation","score":0.539397656917572},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5313131809234619},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.49088698625564575},{"id":"https://openalex.org/keywords/gradient-boosting","display_name":"Gradient boosting","score":0.46721023321151733},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4436030089855194},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.43065890669822693},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.3944041430950165},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39294132590293884},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38423117995262146},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.20455875992774963},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.132289320230484},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.11701041460037231}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8863489627838135},{"id":"https://openalex.org/C53533937","wikidata":"https://www.wikidata.org/wiki/Q185020","display_name":"Histogram","level":3,"score":0.5978770852088928},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.5694020986557007},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5654365420341492},{"id":"https://openalex.org/C67953723","wikidata":"https://www.wikidata.org/wiki/Q192525","display_name":"Workstation","level":2,"score":0.539397656917572},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5313131809234619},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.49088698625564575},{"id":"https://openalex.org/C70153297","wikidata":"https://www.wikidata.org/wiki/Q5591907","display_name":"Gradient boosting","level":3,"score":0.46721023321151733},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4436030089855194},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.43065890669822693},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.3944041430950165},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39294132590293884},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38423117995262146},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.20455875992774963},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.132289320230484},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.11701041460037231},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpds.2019.2920131","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2019.2920131","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-161595","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-161595","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W87810382","https://openalex.org/W181030707","https://openalex.org/W273955616","https://openalex.org/W1480958225","https://openalex.org/W1587157779","https://openalex.org/W1678356000","https://openalex.org/W1986303814","https://openalex.org/W1987356990","https://openalex.org/W2014184229","https://openalex.org/W2019397928","https://openalex.org/W2033549992","https://openalex.org/W2050277572","https://openalex.org/W2066860750","https://openalex.org/W2083575119","https://openalex.org/W2112452856","https://openalex.org/W2115613939","https://openalex.org/W2120391124","https://openalex.org/W2125816831","https://openalex.org/W2128653745","https://openalex.org/W2132083787","https://openalex.org/W2149937348","https://openalex.org/W2152902270","https://openalex.org/W2161616775","https://openalex.org/W2162741763","https://openalex.org/W2167865917","https://openalex.org/W2295598076","https://openalex.org/W2468793326","https://openalex.org/W2604808181","https://openalex.org/W2740731087","https://openalex.org/W2768348081","https://openalex.org/W2782213427","https://openalex.org/W2886958107","https://openalex.org/W2964022491","https://openalex.org/W3004286518","https://openalex.org/W3102476541","https://openalex.org/W4251164127","https://openalex.org/W6610017368","https://openalex.org/W6677732584","https://openalex.org/W6741642434","https://openalex.org/W6745609711","https://openalex.org/W6747652377","https://openalex.org/W6750729320","https://openalex.org/W6832800064"],"related_works":["https://openalex.org/W2967733078","https://openalex.org/W1963859303","https://openalex.org/W3204430031","https://openalex.org/W2364044215","https://openalex.org/W3137904399","https://openalex.org/W4310492845","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2885778889"],"abstract_inverted_index":{"In":[0,216],"this":[1],"paper,":[2],"we":[3,104],"present":[4],"a":[5,208],"novel":[6,107],"parallel":[7,93],"implementation":[8],"for":[9,168],"training":[10,166],"Gradient":[11],"Boosting":[12],"Decision":[13],"Trees":[14],"(GBDTs)":[15],"on":[16,26,102,123,207,221,245,256],"Graphics":[17],"Processing":[18],"Units":[19],"(GPUs).":[20],"Thanks":[21],"to":[22,70,158,177,252,276],"the":[23,29,150,198,219,230,239,242,257,262,269],"excellent":[24],"results":[25,167,185],"classification/regression":[27],"and":[28,44,51,90,114,126,128,154,172,204,281,290],"open":[30],"sourced":[31],"libraries":[32,200,220,231,244],"such":[33],"as":[34,285],"XGBoost,":[35,202,280],"GBDTs":[36],"have":[37,57,282],"become":[38,232],"very":[39],"popular":[40],"in":[41,48,61,95,134],"recent":[42],"years":[43],"won":[45],"many":[46,63,84],"awards":[47],"machine":[49,64],"learning":[50,65],"data":[52,92,120,180,240],"mining":[53],"competitions.":[54],"Although":[55],"GPUs":[56,176,246],"demonstrated":[58],"their":[59],"success":[60],"accelerating":[62],"applications,":[66],"it":[67],"is":[68],"challenging":[69],"develop":[71],"an":[72],"efficient":[73,130,169],"GPU-based":[74],"GBDT":[75],"algorithm.":[76],"The":[77],"key":[78],"challenges":[79,101],"include":[80],"irregular":[81],"memory":[82,129],"accesses,":[83],"sorting":[85],"operations":[86],"with":[87,149,218],"small":[88],"inputs":[89],"varying":[91],"granularities":[94],"tree":[96],"construction.":[97],"To":[98],"tackle":[99],"these":[100],"GPUs,":[103,222],"propose":[105],"various":[106],"techniques":[108],"including":[109],"(i)":[110],"Run-length":[111],"Encoding":[112],"compression":[113],"thread/block":[115],"workload":[116],"dynamic":[117],"allocation,":[118],"(ii)":[119],"partitioning":[121],"based":[122],"stable":[124],"sort,":[125],"fast":[127],"attribute":[131],"ID":[132],"lookup":[133],"node":[135],"splitting,":[136],"(iii)":[137],"finding":[138],"approximate":[139],"split":[140],"points":[141],"using":[142],"two-stage":[143],"histogram":[144,156,160],"building,":[145],"(iv)":[146],"building":[147,161],"histograms":[148],"aware":[151],"of":[152,212,264],"sparsity":[153],"exploiting":[155,174],"subtraction":[157],"reduce":[159],"workload,":[162],"(v)":[163],"reusing":[164],"intermediate":[165],"gradient":[170],"computation,":[171],"(vi)":[173],"multiple":[175],"handle":[178,225],"larger":[179],"sets":[181,241],"efficiently.":[182],"Our":[183],"experimental":[184],"show":[186],"that":[187],"our":[188,265],"algorithm":[189],"named":[190],"ThunderGBM":[191,223,249,273],"can":[192,224,247],"be":[193],"10x":[194],"times":[195,254],"faster":[196],"than":[197],"state-of-the-art":[199],"(i.e.,":[201],"LightGBM":[203,289],"CatBoost)":[205],"running":[206],"relatively":[209],"high-end":[210],"workstation":[211],"20":[213],"CPU":[214],"cores.":[215],"comparison":[217],"higher":[226],"dimensional":[227],"problems":[228],"which":[229,260],"extremely":[233],"slow":[234],"or":[235],"simply":[236],"fail.":[237],"For":[238],"existing":[243],"handle,":[248],"achieves":[250],"up":[251],"10":[253],"speedup":[255],"same":[258],"hardware,":[259],"demonstrates":[261],"significance":[263],"GPU":[266],"optimizations.":[267],"Moreover,":[268],"models":[270],"trained":[271,278,287],"by":[272,279,288],"are":[274],"identical":[275],"those":[277,286],"similar":[283],"quality":[284],"CatBoost.":[291]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
