{"id":"https://openalex.org/W4413279439","doi":"https://doi.org/10.1145/3761822","title":"Sample Selection Bias in Machine Learning for Healthcare","display_name":"Sample Selection Bias in Machine Learning for Healthcare","publication_year":2025,"publication_date":"2025-08-18","ids":{"openalex":"https://openalex.org/W4413279439","doi":"https://doi.org/10.1145/3761822"},"language":"en","primary_location":{"id":"doi:10.1145/3761822","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3761822","pdf_url":null,"source":{"id":"https://openalex.org/S4210174653","display_name":"ACM Transactions on Computing for Healthcare","issn_l":"2637-8051","issn":["2637-8051","2691-1957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Computing for Healthcare","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030433440","display_name":"Vinod Kumar Chauhan","orcid":"https://orcid.org/0000-0001-8195-548X"},"institutions":[{"id":"https://openalex.org/I181647926","display_name":"University of Strathclyde","ror":"https://ror.org/00n3w3b69","country_code":"GB","type":"education","lineage":["https://openalex.org/I181647926"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Vinod Kumar Chauhan","raw_affiliation_strings":["Department of Engineering Science, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland and Department of Computer and Information Sciences, University of Strathclyde, Glasgow, United Kingdom of Great Britain and Northern Ireland","Department of Engineering Science, University of Oxford, and Department of Computer and Information Sciences, University of Strathclyde, UK"],"raw_orcid":"https://orcid.org/0000-0001-8195-548X","affiliations":[{"raw_affiliation_string":"Department of Engineering Science, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland and Department of Computer and Information Sciences, University of Strathclyde, Glasgow, United Kingdom of Great Britain and Northern Ireland","institution_ids":["https://openalex.org/I181647926"]},{"raw_affiliation_string":"Department of Engineering Science, University of Oxford, and Department of Computer and Information Sciences, University of Strathclyde, UK","institution_ids":["https://openalex.org/I181647926"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055138562","display_name":"Lei Clifton","orcid":"https://orcid.org/0000-0001-5595-8468"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Lei Clifton","raw_affiliation_strings":["Nuffield Department of Primary Care Health Sciences, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland","Nuffield Department of Primary Care Health Sciences, University of Oxford, UK"],"raw_orcid":"https://orcid.org/0000-0001-5595-8468","affiliations":[{"raw_affiliation_string":"Nuffield Department of Primary Care Health Sciences, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland","institution_ids":["https://openalex.org/I40120149"]},{"raw_affiliation_string":"Nuffield Department of Primary Care Health Sciences, University of Oxford, UK","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046061272","display_name":"Achille Sala\u00fcn","orcid":"https://orcid.org/0000-0001-5455-9115"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I4210146410","display_name":"Science Oxford","ror":"https://ror.org/04j8yhy50","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I4210146410"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Achille Sala\u00fcn","raw_affiliation_strings":["Department of Engineering Science, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland","Department of Engineering Science, University of Oxford, UK"],"raw_orcid":"https://orcid.org/0000-0001-5455-9115","affiliations":[{"raw_affiliation_string":"Department of Engineering Science, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland","institution_ids":["https://openalex.org/I4210146410"]},{"raw_affiliation_string":"Department of Engineering Science, University of Oxford, UK","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036444950","display_name":"Huiqi Lu","orcid":"https://orcid.org/0000-0002-6140-3394"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I4210146410","display_name":"Science Oxford","ror":"https://ror.org/04j8yhy50","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I4210146410"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Huiqi Yvonne Lu","raw_affiliation_strings":["Department of Engineering Science, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland","Department of Engineering Science, University of Oxford, UK"],"raw_orcid":"https://orcid.org/0000-0002-6140-3394","affiliations":[{"raw_affiliation_string":"Department of Engineering Science, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland","institution_ids":["https://openalex.org/I4210146410"]},{"raw_affiliation_string":"Department of Engineering Science, University of Oxford, UK","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108519143","display_name":"Kim Branson","orcid":null},"institutions":[{"id":"https://openalex.org/I2800110054","display_name":"Age UK","ror":"https://ror.org/050x9d346","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I2800110054"]},{"id":"https://openalex.org/I4210139743","display_name":"Geotechnical Consulting Group (United Kingdom)","ror":"https://ror.org/04fcd0528","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210139743"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kim Branson","raw_affiliation_strings":["GSK PLC, London, UK","Biomedical AI Group, GSK, UK"],"raw_orcid":"https://orcid.org/0009-0004-5699-6369","affiliations":[{"raw_affiliation_string":"GSK PLC, London, UK","institution_ids":["https://openalex.org/I4210139743"]},{"raw_affiliation_string":"Biomedical AI Group, GSK, UK","institution_ids":["https://openalex.org/I2800110054"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033154792","display_name":"Patrick Schwab","orcid":"https://orcid.org/0000-0002-2868-7794"},"institutions":[{"id":"https://openalex.org/I2800110054","display_name":"Age UK","ror":"https://ror.org/050x9d346","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I2800110054"]},{"id":"https://openalex.org/I4210139743","display_name":"Geotechnical Consulting Group (United Kingdom)","ror":"https://ror.org/04fcd0528","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210139743"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Patrick Schwab","raw_affiliation_strings":["GSK PLC, London, UK","Biomedical AI Group, GSK, UK"],"raw_orcid":"https://orcid.org/0000-0002-2868-7794","affiliations":[{"raw_affiliation_string":"GSK PLC, London, UK","institution_ids":["https://openalex.org/I4210139743"]},{"raw_affiliation_string":"Biomedical AI Group, GSK, UK","institution_ids":["https://openalex.org/I2800110054"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076685598","display_name":"Gaurav Nigam","orcid":"https://orcid.org/0000-0003-4699-2263"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Gaurav Nigam","raw_affiliation_strings":["Nuffield Department of Medicine, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland","Nuffield Department of Medicine, University of Oxford, Oxford, UK"],"raw_orcid":"https://orcid.org/0000-0003-4699-2263","affiliations":[{"raw_affiliation_string":"Nuffield Department of Medicine, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland","institution_ids":["https://openalex.org/I40120149"]},{"raw_affiliation_string":"Nuffield Department of Medicine, University of Oxford, Oxford, UK","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040302008","display_name":"David A. Clifton","orcid":"https://orcid.org/0000-0002-9848-8555"},"institutions":[{"id":"https://openalex.org/I4210125878","display_name":"Suzhou Research Institute","ror":"https://ror.org/03ebk0c60","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210125878"]},{"id":"https://openalex.org/I4210146410","display_name":"Science Oxford","ror":"https://ror.org/04j8yhy50","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I4210146410"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"David A. Clifton","raw_affiliation_strings":["Department of Engineering Science, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland and Oxford-Suzhou Institute of Advanced Research (OSCAR), Suzhou, China","Oxford-Suzhou Institute of Advanced Research (OSCAR), Suzhou, China, and Department of Engineering Science, University of Oxford, UK"],"raw_orcid":"https://orcid.org/0000-0002-9848-8555","affiliations":[{"raw_affiliation_string":"Department of Engineering Science, University of Oxford, Oxford, United Kingdom of Great Britain and Northern Ireland and Oxford-Suzhou Institute of Advanced Research (OSCAR), Suzhou, China","institution_ids":["https://openalex.org/I4210146410"]},{"raw_affiliation_string":"Oxford-Suzhou Institute of Advanced Research (OSCAR), Suzhou, China, and Department of Engineering Science, University of Oxford, UK","institution_ids":["https://openalex.org/I4210125878"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5030433440"],"corresponding_institution_ids":["https://openalex.org/I181647926"],"apc_list":null,"apc_paid":null,"fwci":3.4248,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.93213184,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"6","issue":"4","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6505758762359619},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6081693768501282},{"id":"https://openalex.org/keywords/health-care","display_name":"Health care","score":0.5847408771514893},{"id":"https://openalex.org/keywords/selection-bias","display_name":"Selection bias","score":0.533977746963501},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4889165461063385},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45896223187446594},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.44325584173202515},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40788954496383667},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.24649527668952942},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.16351664066314697},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13653630018234253},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.0690305233001709}],"concepts":[{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6505758762359619},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6081693768501282},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.5847408771514893},{"id":"https://openalex.org/C40423286","wikidata":"https://www.wikidata.org/wiki/Q284172","display_name":"Selection bias","level":2,"score":0.533977746963501},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4889165461063385},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45896223187446594},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.44325584173202515},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40788954496383667},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.24649527668952942},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.16351664066314697},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13653630018234253},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0690305233001709},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3761822","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3761822","pdf_url":null,"source":{"id":"https://openalex.org/S4210174653","display_name":"ACM Transactions on Computing for Healthcare","issn_l":"2637-8051","issn":["2637-8051","2691-1957"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Computing for Healthcare","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W932025339","https://openalex.org/W1490308602","https://openalex.org/W1608417949","https://openalex.org/W2034368206","https://openalex.org/W2043324736","https://openalex.org/W2064186732","https://openalex.org/W2211188029","https://openalex.org/W2590957573","https://openalex.org/W2594899909","https://openalex.org/W2781526984","https://openalex.org/W2803760365","https://openalex.org/W2907554860","https://openalex.org/W2934399013","https://openalex.org/W2949392217","https://openalex.org/W2979509742","https://openalex.org/W3047674006","https://openalex.org/W3088943327","https://openalex.org/W3118929067","https://openalex.org/W3170360335","https://openalex.org/W3172011038","https://openalex.org/W3199794406","https://openalex.org/W3205266373","https://openalex.org/W3207434859","https://openalex.org/W3209568516","https://openalex.org/W4240662509","https://openalex.org/W4281700759","https://openalex.org/W4300772090","https://openalex.org/W4379768680","https://openalex.org/W4383682367","https://openalex.org/W4388152645","https://openalex.org/W4388297464","https://openalex.org/W4389430220","https://openalex.org/W4393227196","https://openalex.org/W4393334015","https://openalex.org/W4396624917","https://openalex.org/W4401548252","https://openalex.org/W4413273616","https://openalex.org/W7044050609"],"related_works":["https://openalex.org/W4205762803","https://openalex.org/W2535856026","https://openalex.org/W2265065644","https://openalex.org/W1185300216","https://openalex.org/W2134699697","https://openalex.org/W3017188156","https://openalex.org/W2322875716","https://openalex.org/W2961085424","https://openalex.org/W1884218869","https://openalex.org/W2159994411"],"abstract_inverted_index":{"While":[0],"machine":[1,69,76,125],"learning":[2,70,77,126],"algorithms":[3],"hold":[4],"promise":[5],"for":[6,71,136,162,184,211,230],"personalised":[7],"medicine,":[8],"their":[9],"clinical":[10],"adoption":[11],"remains":[12,65],"limited,":[13],"partly":[14],"due":[15],"to":[16,52,80,201],"biases":[17],"that":[18,197,234],"can":[19,199],"compromise":[20],"the":[21,41,48,62,74,82,89,92,111,122,141,147,169,176,180,185,206,212,218,228,231,238,241,245,267],"reliability":[22],"of":[23,38,47,101,124,175,208,237],"predictions.":[24],"In":[25],"this":[26],"article,":[27],"we":[28,130,151],"focus":[29],"on":[30,121,140],"sample":[31],"selection":[32,264],"bias":[33,39,83,148,269],"(SSB),":[34],"a":[35,99,132,158,202,224],"specific":[36],"type":[37],"where":[40,165],"study":[42,90,109,177,219,246],"population":[43,143,178,214],"is":[44,173],"less":[45],"representative":[46,174,236],"target":[49,93,142,170,213,232],"population,":[50,220],"leading":[51],"biased":[53],"and":[54,91,157,179,193,240,263],"potentially":[55],"harmful":[56],"decisions.":[57],"Despite":[58],"being":[59],"well-known":[60],"in":[61,68,98,205,227],"literature,":[63],"SSB":[64,116,198],"scarcely":[66],"studied":[67],"healthcare.":[72],"Moreover,":[73],"existing":[75,268],"techniques":[78,251],"try":[79],"correct":[81],"mostly":[84],"by":[85,117],"balancing":[86],"distributions":[87],"between":[88],"populations,":[94],"which":[95,172],"may":[96],"result":[97],"loss":[100],"predictive":[102],"performance.":[103],"To":[104],"address":[105],"these":[106],"problems,":[107],"our":[108,249],"illustrates":[110],"potential":[112],"risks":[113],"associated":[114],"with":[115,191,217],"examining":[118],"SSB\u2019s":[119],"impact":[120],"performance":[123,207,229],"algorithms.":[127],"Most":[128],"importantly,":[129],"propose":[131,152],"new":[133],"research":[134],"direction":[135],"addressing":[137,163],"SSB,":[138,164],"based":[139],"identification":[144],"rather":[145],"than":[146],"correction.":[149],"Specifically,":[150],"two":[153],"independent":[154],"networks":[155],"(T-Net)":[156],"multitasking":[159],"network":[160],"(MT-Net)":[161],"one":[166],"network/task":[167],"identifies":[168],"subpopulation":[171],"second":[181],"makes":[182],"predictions":[183],"identified":[186],"subpopulation.":[187],"Our":[188],"empirical":[189],"results":[190],"synthetic":[192],"semi-synthetic":[194],"datasets":[195],"highlight":[196],"lead":[200],"large":[203],"drop":[204],"an":[209],"algorithm":[210],"as":[215,221,223],"compared":[216],"well":[222],"substantial":[225],"difference":[226],"subpopulations":[233],"are":[235],"selected":[239],"non-selected":[242],"patients":[243],"from":[244],"population.":[247],"Furthermore,":[248],"proposed":[250],"demonstrate":[252],"robustness":[253],"across":[254],"various":[255],"settings,":[256],"including":[257],"different":[258],"dataset":[259],"sizes,":[260],"event":[261],"rates":[262],"rates,":[265],"outperforming":[266],"correction":[270],"techniques.":[271]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
