{"id":"https://openalex.org/W4280621405","doi":"https://doi.org/10.1145/3531146.3533233","title":"Evaluation Gaps in Machine Learning Practice","display_name":"Evaluation Gaps in Machine Learning Practice","publication_year":2022,"publication_date":"2022-06-20","ids":{"openalex":"https://openalex.org/W4280621405","doi":"https://doi.org/10.1145/3531146.3533233"},"language":"en","primary_location":{"id":"doi:10.1145/3531146.3533233","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3531146.3533233","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3531146.3533233","source":{"id":"https://openalex.org/S4363608463","display_name":"2022 ACM Conference on Fairness, Accountability, and Transparency","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 ACM Conference on Fairness Accountability and Transparency","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3531146.3533233","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071599724","display_name":"Ben Hutchinson","orcid":"https://orcid.org/0000-0003-2253-6204"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ben Hutchinson","raw_affiliation_strings":["Google Research, Australia"],"affiliations":[{"raw_affiliation_string":"Google Research, Australia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072851389","display_name":"Negar Rostamzadeh","orcid":"https://orcid.org/0000-0002-9638-4664"},"institutions":[{"id":"https://openalex.org/I4210148186","display_name":"Google (Canada)","ror":"https://ror.org/04d06q394","country_code":"CA","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969","https://openalex.org/I4210148186"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Negar Rostamzadeh","raw_affiliation_strings":["Google Research, Canada"],"affiliations":[{"raw_affiliation_string":"Google Research, Canada","institution_ids":["https://openalex.org/I4210148186"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042479413","display_name":"Christina M. Greer","orcid":"https://orcid.org/0000-0003-0547-0733"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christina Greer","raw_affiliation_strings":["Google Research, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014018142","display_name":"Katherine Heller","orcid":"https://orcid.org/0000-0002-4848-7466"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Katherine Heller","raw_affiliation_strings":["Google Research, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019297976","display_name":"Vinodkumar Prabhakaran","orcid":"https://orcid.org/0000-0003-3329-2305"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vinodkumar Prabhakaran","raw_affiliation_strings":["Google Research, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5071599724"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.9267,"has_fulltext":true,"cited_by_count":44,"citation_normalized_percentile":{"value":0.93345324,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1859","last_page":"1876"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7090942859649658},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6341763138771057},{"id":"https://openalex.org/keywords/judgement","display_name":"Judgement","score":0.6247879862785339},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6123574376106262},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5821173191070557},{"id":"https://openalex.org/keywords/normative","display_name":"Normative","score":0.535203754901886},{"id":"https://openalex.org/keywords/equivalence","display_name":"Equivalence (formal languages)","score":0.48603326082229614},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4809774160385132},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4790973961353302},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.46609604358673096},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.42633092403411865},{"id":"https://openalex.org/keywords/management-science","display_name":"Management science","score":0.3960905373096466},{"id":"https://openalex.org/keywords/epistemology","display_name":"Epistemology","score":0.11540365219116211},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1021277904510498}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7090942859649658},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6341763138771057},{"id":"https://openalex.org/C2776548248","wikidata":"https://www.wikidata.org/wiki/Q12621536","display_name":"Judgement","level":2,"score":0.6247879862785339},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6123574376106262},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5821173191070557},{"id":"https://openalex.org/C44725695","wikidata":"https://www.wikidata.org/wiki/Q288156","display_name":"Normative","level":2,"score":0.535203754901886},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.48603326082229614},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4809774160385132},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4790973961353302},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.46609604358673096},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.42633092403411865},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.3960905373096466},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.11540365219116211},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1021277904510498},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3531146.3533233","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3531146.3533233","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3531146.3533233","source":{"id":"https://openalex.org/S4363608463","display_name":"2022 ACM Conference on Fairness, Accountability, and Transparency","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 ACM Conference on Fairness Accountability and Transparency","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3531146.3533233","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3531146.3533233","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3531146.3533233","source":{"id":"https://openalex.org/S4363608463","display_name":"2022 ACM Conference on Fairness, Accountability, and Transparency","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 ACM Conference on Fairness Accountability and Transparency","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4280621405.pdf","grobid_xml":"https://content.openalex.org/works/W4280621405.grobid-xml"},"referenced_works_count":123,"referenced_works":["https://openalex.org/W1593271688","https://openalex.org/W1607035479","https://openalex.org/W1607519102","https://openalex.org/W1962580118","https://openalex.org/W1978743011","https://openalex.org/W1994588695","https://openalex.org/W2008525336","https://openalex.org/W2040467573","https://openalex.org/W2091632169","https://openalex.org/W2092993939","https://openalex.org/W2104597806","https://openalex.org/W2118882740","https://openalex.org/W2129360853","https://openalex.org/W2144578941","https://openalex.org/W2152417223","https://openalex.org/W2194775991","https://openalex.org/W2318996388","https://openalex.org/W2511234952","https://openalex.org/W2531638282","https://openalex.org/W2563153106","https://openalex.org/W2563852449","https://openalex.org/W2584805976","https://openalex.org/W2603258515","https://openalex.org/W2619871744","https://openalex.org/W2624317482","https://openalex.org/W2626039843","https://openalex.org/W2773907782","https://openalex.org/W2775988183","https://openalex.org/W2782864149","https://openalex.org/W2788083170","https://openalex.org/W2788969155","https://openalex.org/W2795435272","https://openalex.org/W2802163129","https://openalex.org/W2810956809","https://openalex.org/W2884074583","https://openalex.org/W2889169527","https://openalex.org/W2897042519","https://openalex.org/W2897154134","https://openalex.org/W2901856814","https://openalex.org/W2906503686","https://openalex.org/W2910707576","https://openalex.org/W2911227954","https://openalex.org/W2920807444","https://openalex.org/W2923014074","https://openalex.org/W2940105172","https://openalex.org/W2957654274","https://openalex.org/W2958293758","https://openalex.org/W2962833164","https://openalex.org/W2963261224","https://openalex.org/W2963293463","https://openalex.org/W2963403405","https://openalex.org/W2963526187","https://openalex.org/W2963612171","https://openalex.org/W2963809228","https://openalex.org/W2969580338","https://openalex.org/W2972684672","https://openalex.org/W2972708930","https://openalex.org/W2986189785","https://openalex.org/W2989344603","https://openalex.org/W2990627857","https://openalex.org/W2991202319","https://openalex.org/W2995006168","https://openalex.org/W2996355630","https://openalex.org/W2997829256","https://openalex.org/W3001807593","https://openalex.org/W3005957694","https://openalex.org/W3007157104","https://openalex.org/W3010140118","https://openalex.org/W3013937328","https://openalex.org/W3015001695","https://openalex.org/W3019130148","https://openalex.org/W3020348090","https://openalex.org/W3023698061","https://openalex.org/W3035400430","https://openalex.org/W3035507081","https://openalex.org/W3045634646","https://openalex.org/W3087231533","https://openalex.org/W3089039457","https://openalex.org/W3092609815","https://openalex.org/W3094376731","https://openalex.org/W3098988204","https://openalex.org/W3100279624","https://openalex.org/W3101767999","https://openalex.org/W3104406626","https://openalex.org/W3104888566","https://openalex.org/W3105424285","https://openalex.org/W3112689365","https://openalex.org/W3122002830","https://openalex.org/W3133631714","https://openalex.org/W3133702157","https://openalex.org/W3133874049","https://openalex.org/W3135371071","https://openalex.org/W3135514117","https://openalex.org/W3135773605","https://openalex.org/W3137220143","https://openalex.org/W3152436735","https://openalex.org/W3166716308","https://openalex.org/W3168194750","https://openalex.org/W3168461525","https://openalex.org/W3172514680","https://openalex.org/W3174220540","https://openalex.org/W3174783113","https://openalex.org/W3175471114","https://openalex.org/W3175982906","https://openalex.org/W3176707157","https://openalex.org/W3184924454","https://openalex.org/W3189849087","https://openalex.org/W3189951784","https://openalex.org/W3198690080","https://openalex.org/W3198708518","https://openalex.org/W3206428286","https://openalex.org/W3206484473","https://openalex.org/W3207830467","https://openalex.org/W3212368439","https://openalex.org/W3214105842","https://openalex.org/W3214897310","https://openalex.org/W4206558204","https://openalex.org/W4226099958","https://openalex.org/W4230074614","https://openalex.org/W4238846128","https://openalex.org/W4288359825","https://openalex.org/W4288617757","https://openalex.org/W4310492983"],"related_works":["https://openalex.org/W2886802431","https://openalex.org/W4388216822","https://openalex.org/W1761762290","https://openalex.org/W4391191813","https://openalex.org/W4387575966","https://openalex.org/W2384262901","https://openalex.org/W2975214487","https://openalex.org/W651304006","https://openalex.org/W2345720417","https://openalex.org/W2385564112"],"abstract_inverted_index":{"Forming":[0],"a":[1,5,24,45,93,97,151],"reliable":[2],"judgement":[3],"of":[4,27,38,48,61,69,76,99,120,150,153,169,174,182,214],"machine":[6,145],"learning":[7,146],"(ML)":[8],"model\u2019s":[9],"appropriateness":[10,196],"for":[11,17,197,209],"an":[12,73],"application":[13],"ecosystem":[14],"is":[15],"critical":[16],"its":[18],"responsible":[19],"use,":[20],"and":[21,32,64,86,106,179],"requires":[22],"considering":[23,103],"broad":[25],"range":[26,47,152],"factors":[28],"including":[29],"harms,":[30],"benefits,":[31],"responsibilities.":[33],"In":[34],"practice,":[35],"however,":[36],"evaluations":[37],"ML":[39,198,215],"models":[40,121],"frequently":[41,132],"focus":[42,68,95],"on":[43,96,188],"only":[44],"narrow":[46,67],"decontextualized":[49],"predictive":[50],"behaviours.":[51],"We":[52],"examine":[53],"the":[54,58,65,83,104,125,128,144,167,171,180,202,212],"evaluation":[55,62,100,207],"gaps":[56],"between":[57],"idealized":[59],"breadth":[60],"concerns":[63],"observed":[66],"actual":[70],"evaluations.":[71],"Through":[72],"empirical":[74],"study":[75],"papers":[77],"from":[78,165],"recent":[79],"high-profile":[80],"conferences":[81],"in":[82,111,124,177],"Computer":[84],"Vision":[85],"Natural":[87],"Language":[88],"Processing":[89],"communities,":[90],"we":[91,114,142],"demonstrate":[92,143],"general":[94],"handful":[98],"methods.":[101],"By":[102,138],"metrics":[105],"test":[107],"data":[108],"distributions":[109],"used":[110],"these":[112,140,159,189],"methods,":[113],"draw":[115],"attention":[116],"to":[117,162,193],"which":[118,155],"properties":[119,129],"are":[122,131],"centered":[123],"field,":[126],"revealing":[127],"that":[130],"neglected":[133],"or":[134],"sidelined":[135],"during":[136],"evaluation.":[137],"studying":[139],"properties,":[141],"discipline\u2019s":[147],"implicit":[148],"assumption":[149],"commitments":[154,161],"have":[156],"normative":[157],"impacts;":[158],"include":[160],"consequentialism,":[163],"abstractability":[164],"context,":[166],"quantifiability":[168],"impacts,":[170],"limited":[172],"role":[173],"model":[175],"inputs":[176],"evaluation,":[178],"equivalence":[181],"different":[183],"failure":[184],"modes.":[185],"Shedding":[186],"light":[187],"assumptions":[190],"enables":[191],"us":[192],"question":[194],"their":[195],"system":[199],"contexts,":[200],"pointing":[201],"way":[203],"towards":[204],"more":[205],"contextualized":[206],"methodologies":[208],"robustly":[210],"examining":[211],"trustworthiness":[213],"models.":[216]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":3}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
