{"id":"https://openalex.org/W4318147439","doi":"https://doi.org/10.1109/bigdata55660.2022.10020842","title":"Model-free feature selection to facilitate automatic discovery of divergent subgroups in tabular data","display_name":"Model-free feature selection to facilitate automatic discovery of divergent subgroups in tabular data","publication_year":2022,"publication_date":"2022-12-17","ids":{"openalex":"https://openalex.org/W4318147439","doi":"https://doi.org/10.1109/bigdata55660.2022.10020842"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata55660.2022.10020842","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigdata55660.2022.10020842","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024549881","display_name":"Girmaw Abebe Tadesse","orcid":"https://orcid.org/0000-0002-2648-9102"},"institutions":[{"id":"https://openalex.org/I4210162937","display_name":"IBM Research - Africa","ror":"https://ror.org/05c0m9m16","country_code":"ZA","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210162937"]}],"countries":["ZA"],"is_corresponding":true,"raw_author_name":"Girmaw Abebe Tadesse","raw_affiliation_strings":["IBM Research,Africa","IBM Research, Africa"],"affiliations":[{"raw_affiliation_string":"IBM Research,Africa","institution_ids":["https://openalex.org/I4210162937"]},{"raw_affiliation_string":"IBM Research, Africa","institution_ids":["https://openalex.org/I4210162937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040949314","display_name":"William Ogallo","orcid":"https://orcid.org/0000-0001-8580-1656"},"institutions":[{"id":"https://openalex.org/I4210162937","display_name":"IBM Research - Africa","ror":"https://ror.org/05c0m9m16","country_code":"ZA","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210162937"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"William Ogallo","raw_affiliation_strings":["IBM Research,Africa","IBM Research, Africa"],"affiliations":[{"raw_affiliation_string":"IBM Research,Africa","institution_ids":["https://openalex.org/I4210162937"]},{"raw_affiliation_string":"IBM Research, Africa","institution_ids":["https://openalex.org/I4210162937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063798437","display_name":"Celia Cintas","orcid":"https://orcid.org/0000-0002-8064-9189"},"institutions":[{"id":"https://openalex.org/I4210162937","display_name":"IBM Research - Africa","ror":"https://ror.org/05c0m9m16","country_code":"ZA","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210162937"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Celia Cintas","raw_affiliation_strings":["IBM Research,Africa","IBM Research, Africa"],"affiliations":[{"raw_affiliation_string":"IBM Research,Africa","institution_ids":["https://openalex.org/I4210162937"]},{"raw_affiliation_string":"IBM Research, Africa","institution_ids":["https://openalex.org/I4210162937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029048857","display_name":"Skyler Speakman","orcid":"https://orcid.org/0000-0003-0337-2312"},"institutions":[{"id":"https://openalex.org/I4210162937","display_name":"IBM Research - Africa","ror":"https://ror.org/05c0m9m16","country_code":"ZA","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210162937"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Skyler Speakman","raw_affiliation_strings":["IBM Research,Africa","IBM Research, Africa"],"affiliations":[{"raw_affiliation_string":"IBM Research,Africa","institution_ids":["https://openalex.org/I4210162937"]},{"raw_affiliation_string":"IBM Research, Africa","institution_ids":["https://openalex.org/I4210162937"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5024549881"],"corresponding_institution_ids":["https://openalex.org/I4210162937"],"apc_list":null,"apc_paid":null,"fwci":0.1039,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.35046326,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"1","issue":null,"first_page":"6039","last_page":"6047"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10136","display_name":"Statistical Methods and Inference","score":0.975600004196167,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11871","display_name":"Advanced Statistical Methods and Models","score":0.9611999988555908,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7721607685089111},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.7591655850410461},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6004560589790344},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.563673734664917},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.55808424949646},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5251414775848389},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.48823049664497375},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.48125311732292175},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.46280401945114136},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3483244776725769}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7721607685089111},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.7591655850410461},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6004560589790344},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.563673734664917},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.55808424949646},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5251414775848389},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.48823049664497375},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.48125311732292175},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46280401945114136},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3483244776725769},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata55660.2022.10020842","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigdata55660.2022.10020842","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W80011902","https://openalex.org/W603636824","https://openalex.org/W1876967670","https://openalex.org/W1971073366","https://openalex.org/W1973036367","https://openalex.org/W2021581601","https://openalex.org/W2111854888","https://openalex.org/W2118561568","https://openalex.org/W2130949063","https://openalex.org/W2142047467","https://openalex.org/W2143426320","https://openalex.org/W2156483112","https://openalex.org/W2165047624","https://openalex.org/W2295598076","https://openalex.org/W2396881363","https://openalex.org/W2506743715","https://openalex.org/W2786827964","https://openalex.org/W2793232926","https://openalex.org/W2800559265","https://openalex.org/W2803881474","https://openalex.org/W2810292802","https://openalex.org/W2884061367","https://openalex.org/W2898227265","https://openalex.org/W2909716785","https://openalex.org/W2921849415","https://openalex.org/W2959053776","https://openalex.org/W2966284335","https://openalex.org/W2999615587","https://openalex.org/W3089028909","https://openalex.org/W3094948551","https://openalex.org/W3104422614","https://openalex.org/W3105524694","https://openalex.org/W3106543020","https://openalex.org/W3111021405","https://openalex.org/W3129166376","https://openalex.org/W3156669901","https://openalex.org/W3164751134","https://openalex.org/W4225355128","https://openalex.org/W4235260163","https://openalex.org/W4292121845","https://openalex.org/W6603280539","https://openalex.org/W6755712434","https://openalex.org/W6765502031","https://openalex.org/W6801076996","https://openalex.org/W6803934722"],"related_works":["https://openalex.org/W2162899405","https://openalex.org/W3113091479","https://openalex.org/W941090075","https://openalex.org/W17155033","https://openalex.org/W2044987316","https://openalex.org/W2237480245","https://openalex.org/W3134374554","https://openalex.org/W2519167559","https://openalex.org/W4386564352","https://openalex.org/W2952668426"],"abstract_inverted_index":{"Data-centric":[0],"AI":[1],"encourages":[2],"the":[3,174,213,226,230,260,264],"need":[4],"for":[5,64,105],"cleaning,":[6],"evaluating,":[7],"and":[8,27,130,142,153,184,196,199,222,232],"understanding":[9],"data":[10,50,107],"in":[11,116,133,229,263,278],"order":[12,117],"to":[13,25,42,61,118,128,135,139,160,168,182,241,258],"achieve":[14,242],"trustworthy":[15],"AI.":[16],"Existing":[17,101],"technologies,":[18],"such":[19,123],"as":[20],"AutoML,":[21],"make":[22],"it":[23,201],"easier":[24],"design":[26],"train":[28,143],"models":[29],"automatically,":[30],"but":[31,273],"there":[32],"is":[33,59,126],"a":[34,37,52,96,111,144,151,210,218,268,275],"lack":[35],"of":[36,40,48,55,75,89,164,176,212,220,248,271],"similar":[38,254],"level":[39],"capability":[41],"extract":[43],"data-centric":[44],"insights.":[45],"Manual":[46],"stratification":[47],"tabular":[49,106],"per":[51],"given":[53],"feature":[54,66,98,102,156,180,205,214],"interest":[56],"(e.g.,":[57,114],"gender)":[58],"limited":[60],"scaling":[62],"up":[63],"higher":[65],"dimension,":[67],"which":[68,91],"could":[69,92],"be":[70,93],"addressed":[71],"using":[72,95,259],"automatic":[73,80,155,162],"discovery":[74,81,163],"divergent/anomalous":[76],"subgroups.":[77,166],"Nonetheless,":[78],"these":[79],"techniques":[82,104],"often":[83,108],"search":[84],"across":[85,190],"potentially":[86],"exponential":[87],"combinations":[88],"features":[90,237,249],"simplified":[94],"preceding":[97],"selection":[99,103,125,157,170,206,215],"step.":[100],"involve":[109],"fitting":[110],"particular":[112],"model":[113],"XGBoost)":[115],"select":[119,185],"important":[120],"features.":[121,186],"However,":[122],"model-based":[124],"prone":[127],"model-bias":[129],"spurious":[131],"correlations":[132],"addition":[134],"requiring":[136],"extra":[137],"resources":[138],"design,":[140],"fine-tune":[141],"model.":[145],"In":[146],"this":[147],"paper,":[148],"we":[149,172],"propose":[150],"model-free":[152],"sparsity-based":[154],"(SAFS)":[158],"framework":[159],"facilitate":[161],"divergent":[165,255],"Different":[167],"filter-based":[169],"techniques,":[171],"exploit":[173],"sparsity":[175],"objective":[177],"measures":[178],"among":[179],"values":[181],"rank":[183],"We":[187],"validated":[188],"SAFS":[189,208,252],"two":[191],"publicly":[192],"available":[193],"datasets":[194],"(MIMIC-III":[195],"Allstate":[197],"Claims)":[198],"compared":[200,257],"with":[202,267,274],"six":[203],"existing":[204,227],"methods.":[207],"achieves":[209],"reduction":[211,277],"time":[216],"by":[217,251],"factor":[219],"81\u00d7":[221],"104\u00d7,":[223],"averaged":[224],"cross":[225],"methods":[228],"MIMIC-III":[231],"Claims":[233,265],"datasets,":[234],"respectively.":[235],"SAFS-selected":[236],"are":[238],"also":[239],"shown":[240],"competitive":[243],"detection":[244,279],"performance,":[245],"e.g.,":[246],"18.3%":[247],"selected":[250],"detected":[253],"group":[256],"whole":[261],"features,":[262],"dataset,":[266],"Jaccard":[269],"similarity":[270],"0.95":[272],"16\u00d7":[276],"time.":[280]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
