{"id":"https://openalex.org/W7151362913","doi":"https://doi.org/10.48550/arxiv.2604.03478","title":"Investigating Data Interventions for Subgroup Fairness: An ICU Case Study","display_name":"Investigating Data Interventions for Subgroup Fairness: An ICU Case Study","publication_year":2026,"publication_date":"2026-04-03","ids":{"openalex":"https://openalex.org/W7151362913","doi":"https://doi.org/10.48550/arxiv.2604.03478"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03478","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03478","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03478","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133093298","display_name":"Erin Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Erin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133144227","display_name":"Judy Hanwen Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Judy Hanwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133115179","display_name":"Irene Y. Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Irene Y.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.43549999594688416,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.43549999594688416,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.18449999392032623,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.10400000214576721,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5989000201225281},{"id":"https://openalex.org/keywords/psychological-intervention","display_name":"Psychological intervention","score":0.5565999746322632},{"id":"https://openalex.org/keywords/harm","display_name":"Harm","score":0.4496999979019165},{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.4074000120162964},{"id":"https://openalex.org/keywords/subgroup-analysis","display_name":"Subgroup analysis","score":0.3991999924182892},{"id":"https://openalex.org/keywords/selection-bias","display_name":"Selection bias","score":0.39239999651908875},{"id":"https://openalex.org/keywords/sample-size-determination","display_name":"Sample size determination","score":0.3862999975681305},{"id":"https://openalex.org/keywords/clinical-trial","display_name":"Clinical trial","score":0.3603000044822693},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.32199999690055847}],"concepts":[{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5989000201225281},{"id":"https://openalex.org/C27415008","wikidata":"https://www.wikidata.org/wiki/Q7256382","display_name":"Psychological intervention","level":2,"score":0.5565999746322632},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5108000040054321},{"id":"https://openalex.org/C2777363581","wikidata":"https://www.wikidata.org/wiki/Q15098235","display_name":"Harm","level":2,"score":0.4496999979019165},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.4262999892234802},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.4074000120162964},{"id":"https://openalex.org/C187960798","wikidata":"https://www.wikidata.org/wiki/Q7631152","display_name":"Subgroup analysis","level":3,"score":0.3991999924182892},{"id":"https://openalex.org/C40423286","wikidata":"https://www.wikidata.org/wiki/Q284172","display_name":"Selection bias","level":2,"score":0.39239999651908875},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.3862999975681305},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3781999945640564},{"id":"https://openalex.org/C535046627","wikidata":"https://www.wikidata.org/wiki/Q30612","display_name":"Clinical trial","level":2,"score":0.3603000044822693},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3411000072956085},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3343000113964081},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.32199999690055847},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.31700000166893005},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.3061999976634979},{"id":"https://openalex.org/C3019952477","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Health records","level":3,"score":0.29910001158714294},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2930999994277954},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.29109999537467957},{"id":"https://openalex.org/C2779318504","wikidata":"https://www.wikidata.org/wiki/Q1438035","display_name":"Research design","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C17923572","wikidata":"https://www.wikidata.org/wiki/Q7250160","display_name":"Propensity score matching","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26899999380111694},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.25850000977516174},{"id":"https://openalex.org/C2780877353","wikidata":"https://www.wikidata.org/wiki/Q2518253","display_name":"Health services research","level":3,"score":0.25690001249313354},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.25519999861717224}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03478","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03478","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03478","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03478","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.8166542649269104,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0,37,59,91],"high-stakes":[1],"settings":[2],"where":[3,53],"machine":[4],"learning":[5],"models":[6,112],"are":[7,55,79,113,166],"used":[8],"to":[9,23,40,102,177,186],"automate":[10],"decision-making":[11],"about":[12],"individuals,":[13],"the":[14,33,42,46,62,73,82,88,96,107,136,142,180,193],"presence":[15],"of":[16,26,64,84,98,109,119,182,196],"algorithmic":[17],"bias":[18],"can":[19,150],"exacerbate":[20],"systemic":[21],"harm":[22],"certain":[24],"subgroups":[25],"people.":[27],"These":[28],"biases":[29],"often":[30],"stem":[31],"from":[32,75,126],"underlying":[34],"training":[35,89],"data.":[36],"practice,":[38],"interventions":[39],"\"fix":[41],"data\"":[43,198],"depend":[44],"on":[45,67,116],"actual":[47],"additional":[48],"data":[49,65,100,125,148,164],"sources":[50,101],"available":[51],"--":[52],"many":[54,160],"less":[56],"than":[57],"ideal.":[58],"these":[60],"cases,":[61],"effects":[63],"scaling":[66],"subgroup":[68,104,188],"performance":[69,105],"become":[70],"volatile,":[71],"as":[72],"improvements":[74],"increased":[76],"sample":[77],"size":[78],"counteracted":[80],"by":[81,203],"introduction":[83],"distribution":[85],"shifts":[86],"in":[87],"set.":[90],"this":[92],"paper,":[93],"we":[94,145],"investigate":[95],"limitations":[97],"combining":[99,206],"improve":[103,187],"within":[106],"context":[108],"healthcare.":[110],"Clinical":[111],"commonly":[114],"trained":[115],"datasets":[117],"comprised":[118],"patient":[120],"electronic":[121],"health":[122],"record":[123],"(EHR)":[124],"different":[127],"hospitals":[128],"or":[129],"admission":[130],"departments.":[131],"Across":[132],"two":[133],"such":[134],"datasets,":[135],"eICU":[137],"Collaborative":[138],"Research":[139],"Database":[140],"and":[141,153,157,159,173,205,208],"MIMIC-IV":[143],"dataset,":[144],"find":[146,178],"that":[147,179],"addition":[149,175],"both":[151,183],"help":[152],"hurt":[154],"model":[155],"fairness":[156,201],"performance,":[158],"intuitive":[161],"strategies":[162,176],"for":[163,199],"selection":[165],"unreliable.":[167],"We":[168],"compare":[169],"model-based":[170,209],"post-hoc":[171],"calibration":[172],"data-centric":[174],"combination":[181],"is":[184],"important":[185],"performance.":[189],"Our":[190],"work":[191],"questions":[192],"traditional":[194],"dogma":[195],"\"better":[197],"overcoming":[200],"challenges":[202],"comparing":[204],"data-":[207],"approaches.":[210]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-08T00:00:00"}
