{"id":"https://openalex.org/W4380369591","doi":"https://doi.org/10.1145/3593013.3593988","title":"The Dataset Multiplicity Problem: How Unreliable Data Impacts Predictions","display_name":"The Dataset Multiplicity Problem: How Unreliable Data Impacts Predictions","publication_year":2023,"publication_date":"2023-06-12","ids":{"openalex":"https://openalex.org/W4380369591","doi":"https://doi.org/10.1145/3593013.3593988"},"language":"en","primary_location":{"id":"doi:10.1145/3593013.3593988","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3593013.3593988","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 ACM Conference on Fairness Accountability and Transparency","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051062831","display_name":"Anna P. Meyer","orcid":"https://orcid.org/0009-0008-9763-5585"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Anna P. Meyer","raw_affiliation_strings":["Department of Computer Sciences, University of Wisconsin - Madison, USA"],"raw_orcid":"https://orcid.org/0009-0008-9763-5585","affiliations":[{"raw_affiliation_string":"Department of Computer Sciences, University of Wisconsin - Madison, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050593520","display_name":"Aws Albarghouthi","orcid":"https://orcid.org/0000-0003-4577-175X"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aws Albarghouthi","raw_affiliation_strings":["Department of Computer Sciences, University of Wisconsin - Madison, USA"],"raw_orcid":"https://orcid.org/0000-0003-4577-175X","affiliations":[{"raw_affiliation_string":"Department of Computer Sciences, University of Wisconsin - Madison, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089920000","display_name":"Loris D\u2019Antoni","orcid":"https://orcid.org/0000-0001-9625-4037"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Loris D'Antoni","raw_affiliation_strings":["Department of Computer Sciences, University of Wisconsin - Madison, USA"],"raw_orcid":"https://orcid.org/0000-0001-9625-4037","affiliations":[{"raw_affiliation_string":"Department of Computer Sciences, University of Wisconsin - Madison, USA","institution_ids":["https://openalex.org/I135310074"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5051062831"],"corresponding_institution_ids":["https://openalex.org/I135310074"],"apc_list":null,"apc_paid":null,"fwci":1.5337,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.85904986,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"193","last_page":"204"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14280","display_name":"Big Data Technologies and Applications","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6451331973075867},{"id":"https://openalex.org/keywords/multiplicity","display_name":"Multiplicity (mathematics)","score":0.6257693767547607},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.43140289187431335},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.32595792412757874},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14637738466262817},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.07306012511253357}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6451331973075867},{"id":"https://openalex.org/C156004811","wikidata":"https://www.wikidata.org/wiki/Q2228257","display_name":"Multiplicity (mathematics)","level":2,"score":0.6257693767547607},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.43140289187431335},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32595792412757874},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14637738466262817},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.07306012511253357},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3593013.3593988","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3593013.3593988","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 ACM Conference on Fairness Accountability and Transparency","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2448944191","display_name":null,"funder_award_id":"1750965, 1763871, 2211968, 1918211, 2023222","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1484551447","https://openalex.org/W2073738917","https://openalex.org/W2119515738","https://openalex.org/W2137207609","https://openalex.org/W2138263042","https://openalex.org/W2142189376","https://openalex.org/W2161498332","https://openalex.org/W2318487091","https://openalex.org/W2487741598","https://openalex.org/W2489340779","https://openalex.org/W2559655401","https://openalex.org/W2559997609","https://openalex.org/W2560674852","https://openalex.org/W2566079294","https://openalex.org/W2604272474","https://openalex.org/W2730750269","https://openalex.org/W2784560833","https://openalex.org/W2809993608","https://openalex.org/W2913059114","https://openalex.org/W2962763344","https://openalex.org/W2964292098","https://openalex.org/W2995006168","https://openalex.org/W3048759177","https://openalex.org/W3100481436","https://openalex.org/W3100511085","https://openalex.org/W3123015728","https://openalex.org/W3124225934","https://openalex.org/W3132748670","https://openalex.org/W3133755170","https://openalex.org/W3135371071","https://openalex.org/W3138851644","https://openalex.org/W3157664688","https://openalex.org/W3164854573","https://openalex.org/W3189849087","https://openalex.org/W3212368439","https://openalex.org/W3212464620","https://openalex.org/W4212774754","https://openalex.org/W4253763531","https://openalex.org/W4283155630","https://openalex.org/W4283168572","https://openalex.org/W4283169532","https://openalex.org/W4297683907","https://openalex.org/W4300433216","https://openalex.org/W4313043655","https://openalex.org/W4320350303"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4378765297","https://openalex.org/W4230581126","https://openalex.org/W2761548024","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W2016521071"],"abstract_inverted_index":{"We":[0,53,81],"introduce":[1],"dataset":[2,21,90,125,132,153],"multiplicity,":[3],"a":[4,25,93],"way":[5],"to":[6,56,60,84],"study":[7],"how":[8,55,83],"inaccuracies,":[9],"uncertainty,":[10],"and":[11,76,97,140,159],"social":[12,71],"bias":[13],"in":[14,66],"training":[15],"datasets":[16],"impact":[17],"test-time":[18,37],"predictions.":[19],"The":[20],"multiplicity":[22,91,133,154],"framework":[23,59],"asks":[24],"counterfactual":[26],"question":[27],"of":[28,32,50,64,89,99,130,152],"what":[29,136],"the":[30,51,87,128],"set":[31],"resultant":[33],"models":[34,102],"(and":[35],"associated":[36],"predictions)":[38],"would":[39],"be":[40,169],"if":[41],"we":[42,149],"could":[43],"somehow":[44],"access":[45],"all":[46],"hypothetical,":[47],"unbiased":[48],"versions":[49],"dataset.":[52],"discuss":[54,150],"use":[57],"this":[58],"encapsulate":[61],"various":[62],"sources":[63],"uncertainty":[65],"datasets\u2019":[67],"factualness,":[68],"including":[69,161],"systemic":[70],"bias,":[72],"data":[73],"collection":[74],"practices,":[75],"noisy":[77],"labels":[78],"or":[79],"features.":[80],"show":[82],"exactly":[85],"analyze":[86],"impacts":[88],"for":[92,155,163],"specific":[94],"model":[95,165],"architecture":[96],"type":[98],"uncertainty:":[100],"linear":[101],"with":[103],"label":[104],"errors.":[105],"Our":[106],"empirical":[107],"analysis":[108],"shows":[109],"that":[110],"real-world":[111],"datasets,":[112],"under":[113],"reasonable":[114],"assumptions,":[115],"contain":[116],"many":[117],"test":[118],"samples":[119,137],"whose":[120],"predictions":[121],"are":[122,138,145],"affected":[123],"by":[124],"multiplicity.":[126],"Furthermore,":[127],"choice":[129],"domain-specific":[131],"definition":[134],"determines":[135],"affected,":[139],"whether":[141],"different":[142],"demographic":[143],"groups":[144],"disparately":[146],"impacted.":[147],"Finally,":[148],"implications":[151],"machine":[156],"learning":[157],"practice":[158],"research,":[160],"considerations":[162],"when":[164],"outcomes":[166],"should":[167],"not":[168],"trusted.":[170]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
