{"id":"https://openalex.org/W4392843851","doi":"https://doi.org/10.1109/tkde.2024.3365524","title":"Automated Data Cleaning can Hurt Fairness in Machine Learning-Based Decision Making","display_name":"Automated Data Cleaning can Hurt Fairness in Machine Learning-Based Decision Making","publication_year":2024,"publication_date":"2024-02-14","ids":{"openalex":"https://openalex.org/W4392843851","doi":"https://doi.org/10.1109/tkde.2024.3365524"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2024.3365524","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2024.3365524","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066160649","display_name":"Shubha Guha","orcid":"https://orcid.org/0000-0002-9156-1561"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Shubha Guha","raw_affiliation_strings":["University of Amsterdam, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-9156-1561","affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038710725","display_name":"Falaah Arif Khan","orcid":"https://orcid.org/0000-0002-4678-5929"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Falaah Arif Khan","raw_affiliation_strings":["New York University, New York, NY, USA"],"raw_orcid":"https://orcid.org/0000-0002-4678-5929","affiliations":[{"raw_affiliation_string":"New York University, New York, NY, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082830839","display_name":"Julia Stoyanovich","orcid":"https://orcid.org/0000-0002-1587-0450"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Julia Stoyanovich","raw_affiliation_strings":["New York University, New York, NY, USA"],"raw_orcid":"https://orcid.org/0000-0002-1587-0450","affiliations":[{"raw_affiliation_string":"New York University, New York, NY, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090934117","display_name":"Sebastian Schelter","orcid":"https://orcid.org/0000-0003-4722-5840"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Sebastian Schelter","raw_affiliation_strings":["University of Amsterdam, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0003-4722-5840","affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5066160649"],"corresponding_institution_ids":["https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":7.5903,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.97245059,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"36","issue":"12","first_page":"7368","last_page":"7379"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.8253999948501587,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.8253999948501587,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8382812142372131},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4326602518558502},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39094239473342896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8382812142372131},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4326602518558502},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39094239473342896}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2024.3365524","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2024.3365524","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W2014352947","https://openalex.org/W2100960835","https://openalex.org/W2119803607","https://openalex.org/W2170712852","https://openalex.org/W2544486974","https://openalex.org/W2548122763","https://openalex.org/W2591700809","https://openalex.org/W2896331720","https://openalex.org/W2905588001","https://openalex.org/W2912887944","https://openalex.org/W2929941791","https://openalex.org/W2948145720","https://openalex.org/W2963917042","https://openalex.org/W2970773487","https://openalex.org/W3105610736","https://openalex.org/W3105977086","https://openalex.org/W3125924446","https://openalex.org/W3207970821","https://openalex.org/W4220991771","https://openalex.org/W4224952145","https://openalex.org/W4243565423","https://openalex.org/W4281400173","https://openalex.org/W4281770862","https://openalex.org/W4293055795","https://openalex.org/W4306405418","https://openalex.org/W6677534882","https://openalex.org/W6737419302","https://openalex.org/W6748382702","https://openalex.org/W6751917733","https://openalex.org/W6770908494","https://openalex.org/W6793012311","https://openalex.org/W6799868450","https://openalex.org/W6803112758"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347","https://openalex.org/W4210805261"],"abstract_inverted_index":{"In":[0],"this":[1,213,270],"paper,":[2],"we":[3,118,172,210,222,272],"interrogate":[4],"whether":[5,20],"data":[6,22,53,87,106,129,144,228,235,260,266,282],"quality":[7,88,107],"issues":[8],"track":[9,111],"demographic":[10,112,249],"group":[11,113,194],"membership":[12,100],"(based":[13],"on":[14,55,124,131,187,243],"sex,":[15],"race":[16],"and":[17,19,193,203,238,255,263,285],"age)":[18],"automated":[21,128,143],"cleaning":[23,54,130,145,165,183,236,283],"\u2014":[24,34],"of":[25,38,47,52,94,127,180,190,234,240,280],"the":[26,36,45,50,65,70,125,164,175,188,232,278],"kind":[27],"commonly":[28],"used":[29],"in":[30,57,64,78,101,212,227,231],"production":[31],"ML":[32,244],"systems":[33],"impacts":[35,239],"fairness":[37,56,157,191],"predictions":[39],"made":[40],"by":[41,73,259],"these":[42],"systems.":[43],"To":[44],"best":[46],"our":[48,201],"knowledge,":[49],"impact":[51,126,179],"downstream":[58],"tasks":[59],"has":[60],"not":[61,109,168],"been":[62],"investigated":[63],"literature.":[66],"We":[67,82,139,199],"first":[68],"analyse":[69],"tuples":[71],"flagged":[72],"common":[74],"error":[75],"detection":[76],"strategies":[77],"five":[79],"research":[80,275],"datasets.":[81],"find":[83,173],"that,":[84,141],"while":[85,142],"specific":[86],"issues,":[89],"such":[90,241],"as":[91],"higher":[92],"rates":[93],"missing":[95],"values,":[96],"are":[97,167],"associated":[98],"with":[99],"historically":[102],"disadvantaged":[103],"groups,":[104],"poor":[105],"does":[108],"generally":[110],"membership.":[114],"As":[115],"a":[116,120,181],"follow-up,":[117],"conduct":[119],"large-scale":[121],"empirical":[122],"study":[123],"fairness,":[132],"involving":[133],"more":[134,153],"than":[135,158],"26,000":[136],"model":[137,245],"evaluations.":[138],"observe":[140],"is":[146,152,215],"unlikely":[147],"to":[148,155,159],"worsen":[149,156],"accuracy,":[150],"it":[151,219],"likely":[154],"improve":[160],"it,":[161],"especially":[162],"when":[163],"techniques":[166],"carefully":[169],"chosen.":[170],"Furthermore,":[171],"that":[174,221],"positive":[176],"or":[177,197],"negative":[178],"particular":[182],"technique":[184],"often":[185],"depends":[186],"choice":[189],"metric":[192],"definition":[195],"(single-attribute":[196],"intersectional).":[198],"make":[200],"code":[202],"experimental":[204],"results":[205],"publicly":[206],"available.":[207],"The":[208],"analysis":[209,253],"conducted":[211],"paper":[214],"difficult,":[216],"primarily":[217],"because":[218],"requires":[220,264],"think":[223],"holistically":[224],"about":[225],"disparities":[226,230,242],"quality,":[229],"effectiveness":[233],"methods,":[237,284],"performance":[246],"for":[247,291],"different":[248],"groups.":[250],"Such":[251],"holistic":[252],"can":[254],"should":[256],"be":[257],"supported":[258],"engineering":[261,267],"tools,":[262],"substantial":[265],"research.":[268],"Towards":[269],"goal,":[271],"discuss":[273],"open":[274],"questions,":[276],"envision":[277],"development":[279],"fairness-aware":[281],"their":[286],"integration":[287],"into":[288],"complex":[289],"pipelines":[290],"ML-based":[292],"decision":[293],"making.":[294]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":13}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
