{"id":"https://openalex.org/W4221138850","doi":"https://doi.org/10.1145/3531146.3533203","title":"Adaptive Sampling Strategies to Construct Equitable Training Datasets","display_name":"Adaptive Sampling Strategies to Construct Equitable Training Datasets","publication_year":2022,"publication_date":"2022-06-20","ids":{"openalex":"https://openalex.org/W4221138850","doi":"https://doi.org/10.1145/3531146.3533203"},"language":"en","primary_location":{"id":"doi:10.1145/3531146.3533203","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3531146.3533203","pdf_url":null,"source":{"id":"https://openalex.org/S4363608463","display_name":"2022 ACM Conference on Fairness, Accountability, and Transparency","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 ACM Conference on Fairness Accountability and Transparency","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103971646","display_name":"William Cai","orcid":"https://orcid.org/0000-0003-1769-5112"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"William Cai","raw_affiliation_strings":["Stanford, USA"],"affiliations":[{"raw_affiliation_string":"Stanford, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050977030","display_name":"Ro Encarnaci\u00f3n","orcid":"https://orcid.org/0000-0002-2562-4907"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ro Encarnacion","raw_affiliation_strings":["Stanford, USA"],"affiliations":[{"raw_affiliation_string":"Stanford, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051670362","display_name":"Bobbie Chern","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bobbie Chern","raw_affiliation_strings":["Meta, USA"],"affiliations":[{"raw_affiliation_string":"Meta, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026829784","display_name":"Sam Corbett\u2010Davies","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sam Corbett-Davies","raw_affiliation_strings":["Meta, USA"],"affiliations":[{"raw_affiliation_string":"Meta, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056913696","display_name":"Miranda Bogen","orcid":"https://orcid.org/0009-0006-8874-8583"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miranda Bogen","raw_affiliation_strings":["Meta, USA"],"affiliations":[{"raw_affiliation_string":"Meta, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000039831","display_name":"Stevie Bergman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stevie Bergman","raw_affiliation_strings":["Meta, USA"],"affiliations":[{"raw_affiliation_string":"Meta, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027036879","display_name":"Sharad Goel","orcid":"https://orcid.org/0000-0002-6103-9318"},"institutions":[{"id":"https://openalex.org/I4210141641","display_name":"IIT@Harvard","ror":"https://ror.org/044hpwe09","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210141641"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sharad Goel","raw_affiliation_strings":["Harvard, USA"],"affiliations":[{"raw_affiliation_string":"Harvard, USA","institution_ids":["https://openalex.org/I4210141641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5103971646"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":12.9296,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.99009901,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1467","last_page":"1478"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11287","display_name":"Cancer Genomics and Diagnostics","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11287","display_name":"Cancer Genomics and Diagnostics","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10845","display_name":"Advanced Causal Inference Techniques","score":0.9783999919891357,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7443903684616089},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6109899282455444},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.5921081304550171},{"id":"https://openalex.org/keywords/operationalization","display_name":"Operationalization","score":0.5535314679145813},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5213692784309387},{"id":"https://openalex.org/keywords/data-collection","display_name":"Data collection","score":0.5206795930862427},{"id":"https://openalex.org/keywords/representativeness-heuristic","display_name":"Representativeness heuristic","score":0.5076278448104858},{"id":"https://openalex.org/keywords/adaptive-sampling","display_name":"Adaptive sampling","score":0.47172775864601135},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.4412876069545746},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.43826791644096375},{"id":"https://openalex.org/keywords/sample-size-determination","display_name":"Sample size determination","score":0.4116637408733368},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1665399670600891},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09593087434768677}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7443903684616089},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6109899282455444},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.5921081304550171},{"id":"https://openalex.org/C9354725","wikidata":"https://www.wikidata.org/wiki/Q286017","display_name":"Operationalization","level":2,"score":0.5535314679145813},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5213692784309387},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.5206795930862427},{"id":"https://openalex.org/C37381756","wikidata":"https://www.wikidata.org/wiki/Q20203288","display_name":"Representativeness heuristic","level":2,"score":0.5076278448104858},{"id":"https://openalex.org/C2781395549","wikidata":"https://www.wikidata.org/wiki/Q4680762","display_name":"Adaptive sampling","level":3,"score":0.47172775864601135},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.4412876069545746},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.43826791644096375},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.4116637408733368},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1665399670600891},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09593087434768677},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3531146.3533203","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3531146.3533203","pdf_url":null,"source":{"id":"https://openalex.org/S4363608463","display_name":"2022 ACM Conference on Fairness, Accountability, and Transparency","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 ACM Conference on Fairness Accountability and Transparency","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1961345416","https://openalex.org/W2021064404","https://openalex.org/W2083685419","https://openalex.org/W2097246321","https://openalex.org/W2100960835","https://openalex.org/W2107862628","https://openalex.org/W2110236805","https://openalex.org/W2137507956","https://openalex.org/W2530395818","https://openalex.org/W2531587846","https://openalex.org/W2584805976","https://openalex.org/W2598022332","https://openalex.org/W2809878087","https://openalex.org/W2886752110","https://openalex.org/W2897042519","https://openalex.org/W2902054973","https://openalex.org/W2909212904","https://openalex.org/W2911227954","https://openalex.org/W2922534626","https://openalex.org/W2949678053","https://openalex.org/W2951641704","https://openalex.org/W2962059918","https://openalex.org/W2962922665","https://openalex.org/W2962951800","https://openalex.org/W2963718755","https://openalex.org/W2964235839","https://openalex.org/W3000875740","https://openalex.org/W3004832625","https://openalex.org/W3012624518","https://openalex.org/W3021436325","https://openalex.org/W3090495773","https://openalex.org/W3100279624","https://openalex.org/W3110169235","https://openalex.org/W3133726592","https://openalex.org/W3212368439","https://openalex.org/W4287239856","https://openalex.org/W4289258088","https://openalex.org/W6728551298"],"related_works":["https://openalex.org/W187420932","https://openalex.org/W2513185592","https://openalex.org/W2197861887","https://openalex.org/W1771779360","https://openalex.org/W305124712","https://openalex.org/W4308858364","https://openalex.org/W1979887339","https://openalex.org/W4238827538","https://openalex.org/W591735475","https://openalex.org/W3123458537"],"abstract_inverted_index":{"In":[0,232],"domains":[1],"ranging":[2],"from":[3,102,205],"computer":[4],"vision":[5],"to":[6,16,32,55,91,98,239],"natural":[7],"language":[8],"processing,":[9],"machine":[10],"learning":[11,129,153,181],"models":[12,45],"have":[13],"been":[14],"shown":[15],"exhibit":[17],"stark":[18],"disparities,":[19],"often":[20,51,203],"performing":[21],"worse":[22],"for":[23,76,212],"members":[24],"of":[25,39,66,121,140,151,179,192],"traditionally":[26],"underserved":[27],"groups.":[28],"One":[29],"factor":[30],"contributing":[31],"these":[33],"performance":[34,123],"gaps":[35],"is":[36,50],"a":[37,73,82,85,93,111,119,189],"lack":[38],"representation":[40],"in":[41,58,115],"the":[42,44,64,148,152,180],"data":[43,95,101,156,207],"are":[46,159],"trained":[47],"on.":[48],"It":[49],"unclear,":[52],"however,":[53],"how":[54,90],"operationalize":[56],"representativeness":[57],"specific":[59],"applications.":[60],"Here":[61],"we":[62,162,187,218],"formalize":[63],"problem":[65,170],"creating":[67],"equitable":[68,247],"training":[69,100],"datasets,":[70],"and":[71,131,142,229],"propose":[72],"statistical":[74,149],"framework":[75],"addressing":[77],"this":[78,168,233],"problem.":[79],"We":[80,105],"consider":[81],"setting":[83],"where":[84],"model":[86],"builder":[87],"must":[88],"decide":[89],"allocate":[92],"fixed":[94],"collection":[96,157],"budget":[97],"gather":[99],"different":[103],"subgroups.":[104],"then":[106],"frame":[107],"dataset":[108],"creation":[109],"as":[110,145,147],"constrained":[112],"optimization":[113,169],"problem,":[114],"which":[116],"one":[117],"maximizes":[118],"function":[120],"group-specific":[122,128,215],"metrics":[124],"based":[125],"on":[126,196],"(estimated)":[127],"rates":[130],"costs":[132],"per":[133],"sample.":[134],"This":[135],"flexible":[136],"approach":[137,223],"incorporates":[138],"preferences":[139],"model-builders":[141],"other":[143],"stakeholders,":[144],"well":[146],"properties":[150],"task.":[154],"When":[155,209],"decisions":[158,241],"made":[160],"sequentially,":[161],"show":[163],"that":[164,202,220],"under":[165],"certain":[166],"conditions":[167],"can":[171],"be":[172],"efficiently":[173],"solved":[174],"even":[175],"without":[176],"prior":[177],"knowledge":[178],"rates.":[182],"To":[183],"illustrate":[184],"our":[185,221],"approach,":[186],"conduct":[188],"simulation":[190],"study":[191],"polygenic":[193],"risk":[194],"scores":[195],"synthetic":[197],"genomic":[198],"data\u2014an":[199],"application":[200],"domain":[201],"suffers":[204],"non-representative":[206],"collection.":[208],"optimizing":[210],"policies":[211],"overall":[213],"or":[214,246],"average":[216],"health,":[217],"find":[219],"adaptive":[222],"outperforms":[224],"heuristic":[225],"strategies,":[226],"including":[227],"equal":[228,235,245],"representative":[230],"sampling.":[231],"sense,":[234],"treatment":[236],"with":[237],"respect":[238],"sampling":[240],"does":[242],"not":[243],"guarantee":[244],"outcomes.":[248]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
