{"id":"https://openalex.org/W7123361955","doi":"https://doi.org/10.1109/access.2026.3651790","title":"Sparse Probabilistic Splits for Tree Ensembles: A Tunable Regularizer for Stable Generalization on Tabular Data","display_name":"Sparse Probabilistic Splits for Tree Ensembles: A Tunable Regularizer for Stable Generalization on Tabular Data","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7123361955","doi":"https://doi.org/10.1109/access.2026.3651790"},"language":null,"primary_location":{"id":"doi:10.1109/access.2026.3651790","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3651790","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2026.3651790","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048322657","display_name":"Jae Hyung Choi","orcid":"https://orcid.org/0000-0003-1173-2787"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jongkwan Choi","raw_affiliation_strings":["Department of Industrial Engineering, Yonsei University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Department of Industrial Engineering, Yonsei University, Seoul, South Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027140824","display_name":"YongKeun Lee","orcid":"https://orcid.org/0000-0003-0789-8354"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yujin Lee","raw_affiliation_strings":["Department of Industrial Engineering, Yonsei University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Department of Industrial Engineering, Yonsei University, Seoul, South Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122884069","display_name":"Eunbin Yun","orcid":null},"institutions":[{"id":"https://openalex.org/I12832649","display_name":"Gachon University","ror":"https://ror.org/03ryywt80","country_code":"KR","type":"education","lineage":["https://openalex.org/I12832649"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Eunbin Yun","raw_affiliation_strings":["Department of Mechanical, Smart, and Industrial Engineering, Gachon University, Seongnam, South Korea"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical, Smart, and Industrial Engineering, Gachon University, Seongnam, South Korea","institution_ids":["https://openalex.org/I12832649"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5122862769","display_name":"Gyeongtaek Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I12832649","display_name":"Gachon University","ror":"https://ror.org/03ryywt80","country_code":"KR","type":"education","lineage":["https://openalex.org/I12832649"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Gyeongtaek Lee","raw_affiliation_strings":["Department of Mechanical, Smart, and Industrial Engineering, Gachon University, Seongnam, South Korea"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical, Smart, and Industrial Engineering, Gachon University, Seongnam, South Korea","institution_ids":["https://openalex.org/I12832649"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5048322657"],"corresponding_institution_ids":["https://openalex.org/I193775966"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17758631,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":null,"first_page":"7430","last_page":"7442"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.3709000051021576,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.3709000051021576,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.10000000149011612,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.0478999987244606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6894000172615051},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.5899999737739563},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.5792999863624573},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5200999975204468},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.517300009727478},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4839000105857849},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.40059998631477356},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.3950999975204468},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.359499990940094}],"concepts":[{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6894000172615051},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6128000020980835},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.5899999737739563},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.5792999863624573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5203999876976013},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5200999975204468},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.517300009727478},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4839000105857849},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48080000281333923},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.40059998631477356},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.3950999975204468},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.359499990940094},{"id":"https://openalex.org/C5465570","wikidata":"https://www.wikidata.org/wiki/Q5326898","display_name":"Early stopping","level":3,"score":0.350600004196167},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.35040000081062317},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3449000120162964},{"id":"https://openalex.org/C125112378","wikidata":"https://www.wikidata.org/wiki/Q176640","display_name":"Randomness","level":2,"score":0.33500000834465027},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.3257000148296356},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.32409998774528503},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.3131999969482422},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3100000023841858},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.30559998750686646},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2937000095844269},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29350000619888306},{"id":"https://openalex.org/C110083411","wikidata":"https://www.wikidata.org/wiki/Q1744628","display_name":"Statistical classification","level":2,"score":0.28999999165534973},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26899999380111694},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.25369998812675476}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/access.2026.3651790","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3651790","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/access.2026.3651790","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3651790","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1499669280","https://openalex.org/W1615149086","https://openalex.org/W2024241221","https://openalex.org/W2047028564","https://openalex.org/W2056132907","https://openalex.org/W2122825543","https://openalex.org/W2135046866","https://openalex.org/W2148143831","https://openalex.org/W2149706766","https://openalex.org/W2152761983","https://openalex.org/W2261059368","https://openalex.org/W2295598076","https://openalex.org/W2487770199","https://openalex.org/W2586160710","https://openalex.org/W2783901534","https://openalex.org/W2789758093","https://openalex.org/W2908216486","https://openalex.org/W3093314742","https://openalex.org/W3202428668","https://openalex.org/W3216660278","https://openalex.org/W4212883601","https://openalex.org/W4232478844","https://openalex.org/W4240383254","https://openalex.org/W4252684946","https://openalex.org/W4287854749","https://openalex.org/W4294541781","https://openalex.org/W4297957988","https://openalex.org/W4361275302","https://openalex.org/W4400762160"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3,79,163],"regularization":[4,167],"technique":[5],"called":[6],"sparse":[7,37,43],"probabilistic":[8,38,44],"split":[9],"(SPS),":[10],"which":[11],"introduces":[12],"controlled":[13],"randomness":[14],"into":[15],"the":[16,36,42,76,94,119,171],"node-splitting":[17],"process":[18],"of":[19,96,173],"decision":[20,88],"trees":[21],"to":[22,65,132,139,153],"mitigate":[23],"overfitting":[24],"and":[25,41,71,102,113,124,165],"enhance":[26],"generalization.":[27],"Building":[28],"on":[29],"this":[30,63],"technique,":[31],"we":[32],"propose":[33],"two":[34],"models:":[35],"tree":[39,46],"(SPT)":[40],"ensemble":[45,66,134],"(SPET).":[47],"SPT":[48,73],"improves":[49],"robustness":[50,138],"in":[51,176],"single-tree":[52],"settings":[53],"by":[54],"probabilistically":[55],"exploring":[56],"diverse":[57,154],"partition":[58],"structures,":[59],"while":[60],"SPET":[61,98,117,141],"extends":[62],"mechanism":[64],"learning,":[67],"promoting":[68],"model":[69],"diversity":[70],"stability.":[72],"successfully":[74],"handled":[75],"XOR":[77],"dataset,":[78],"representative":[80],"nonlinear":[81],"classification":[82],"problem":[83],"that":[84,160],"typically":[85],"challenges":[86],"traditional":[87],"trees.":[89],"In":[90],"additional":[91],"analyses":[92],"comparing":[93],"number":[95],"trees,":[97,107],"showed":[99],"greater":[100],"stability":[101],"accuracy":[103],"even":[104],"with":[105,127],"fewer":[106],"demonstrating":[108,136],"its":[109,151],"efficiency.":[110],"On":[111],"high-dimensional":[112],"class-imbalanced":[114],"manufacturing":[115],"datasets,":[116,149],"achieved":[118],"highest":[120],"average":[121],"F1":[122],"score":[123],"ROC":[125],"AUC":[126],"competitive":[128],"generalization":[129,172],"gaps":[130],"compared":[131],"widely-used":[133],"models,":[135],"strong":[137],"overfitting.":[140],"also":[142],"maintained":[143],"reliable":[144],"performance":[145],"across":[146],"public":[147],"benchmark":[148],"indicating":[150],"adaptability":[152],"data":[155],"environments.":[156],"These":[157],"results":[158],"suggest":[159],"SPS":[161],"is":[162],"lightweight":[164],"effective":[166],"strategy":[168],"for":[169],"improving":[170],"tree-based":[174],"models":[175],"practical":[177],"applications.":[178]},"counts_by_year":[],"updated_date":"2026-01-19T04:01:09.351973","created_date":"2026-01-14T00:00:00"}
