{"id":"https://openalex.org/W4308784155","doi":"https://doi.org/10.1145/3555760","title":"Understanding Machine Learning Practitioners' Data Documentation Perceptions, Needs, Challenges, and Desiderata","display_name":"Understanding Machine Learning Practitioners' Data Documentation Perceptions, Needs, Challenges, and Desiderata","publication_year":2022,"publication_date":"2022-11-07","ids":{"openalex":"https://openalex.org/W4308784155","doi":"https://doi.org/10.1145/3555760"},"language":"en","primary_location":{"id":"doi:10.1145/3555760","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3555760","pdf_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3555760&file=v6cscw2340aux.pdf","source":{"id":"https://openalex.org/S4210183893","display_name":"Proceedings of the ACM on Human-Computer Interaction","issn_l":"2573-0142","issn":["2573-0142"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Human-Computer Interaction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3555760&file=v6cscw2340aux.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077430492","display_name":"Amy Heger","orcid":"https://orcid.org/0000-0002-0822-6595"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Amy K. Heger","raw_affiliation_strings":["Microsoft, St. Louis, MO, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft, St. Louis, MO, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073683081","display_name":"Liz B. Marquis","orcid":"https://orcid.org/0000-0002-8753-9038"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liz B. Marquis","raw_affiliation_strings":["University of Michigan, Ann Arbor, MI, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066017612","display_name":"Mihaela Vorvoreanu","orcid":"https://orcid.org/0000-0002-3322-3548"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mihaela Vorvoreanu","raw_affiliation_strings":["Microsoft, Redmond, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046348432","display_name":"Hanna Wallach","orcid":"https://orcid.org/0000-0003-3395-7186"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hanna Wallach","raw_affiliation_strings":["Microsoft, New York City, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft, New York City, NY, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043117896","display_name":"Jennifer Wortman Vaughan","orcid":"https://orcid.org/0000-0002-7807-2018"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jennifer Wortman Vaughan","raw_affiliation_strings":["Microsoft, New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft, New York, NY, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5077430492"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":9.3222,"has_fulltext":false,"cited_by_count":48,"citation_normalized_percentile":{"value":0.9819599,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"6","issue":"CSCW2","first_page":"1","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9624999761581421,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.9523098468780518},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6787997484207153},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6234346628189087},{"id":"https://openalex.org/keywords/transparency","display_name":"Transparency (behavior)","score":0.5054969787597656},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4825019836425781},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.439251184463501},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.43852898478507996},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.42589622735977173},{"id":"https://openalex.org/keywords/technical-documentation","display_name":"Technical documentation","score":0.4258894622325897},{"id":"https://openalex.org/keywords/internal-documentation","display_name":"Internal documentation","score":0.41716018319129944},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.23335611820220947},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.13429683446884155}],"concepts":[{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.9523098468780518},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6787997484207153},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6234346628189087},{"id":"https://openalex.org/C2780233690","wikidata":"https://www.wikidata.org/wiki/Q535347","display_name":"Transparency (behavior)","level":2,"score":0.5054969787597656},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4825019836425781},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.439251184463501},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.43852898478507996},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.42589622735977173},{"id":"https://openalex.org/C25688753","wikidata":"https://www.wikidata.org/wiki/Q1413406","display_name":"Technical documentation","level":3,"score":0.4258894622325897},{"id":"https://openalex.org/C140396857","wikidata":"https://www.wikidata.org/wiki/Q16934771","display_name":"Internal documentation","level":5,"score":0.41716018319129944},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.23335611820220947},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.13429683446884155},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.0},{"id":"https://openalex.org/C186846655","wikidata":"https://www.wikidata.org/wiki/Q3398377","display_name":"Software construction","level":4,"score":0.0},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3555760","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3555760","pdf_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3555760&file=v6cscw2340aux.pdf","source":{"id":"https://openalex.org/S4210183893","display_name":"Proceedings of the ACM on Human-Computer Interaction","issn_l":"2573-0142","issn":["2573-0142"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Human-Computer Interaction","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3555760","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3555760","pdf_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3555760&file=v6cscw2340aux.pdf","source":{"id":"https://openalex.org/S4210183893","display_name":"Proceedings of the ACM on Human-Computer Interaction","issn_l":"2573-0142","issn":["2573-0142"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Human-Computer Interaction","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals","score":0.44999998807907104}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4308784155.pdf","grobid_xml":"https://content.openalex.org/works/W4308784155.grobid-xml"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W1684809301","https://openalex.org/W1962580118","https://openalex.org/W1979290264","https://openalex.org/W2054221741","https://openalex.org/W2073568224","https://openalex.org/W2165254944","https://openalex.org/W2289713250","https://openalex.org/W2396614874","https://openalex.org/W2529670628","https://openalex.org/W2771189628","https://openalex.org/W2786242872","https://openalex.org/W2788969155","https://openalex.org/W2811374795","https://openalex.org/W2897042519","https://openalex.org/W2911227954","https://openalex.org/W2922234936","https://openalex.org/W2941486823","https://openalex.org/W2941766203","https://openalex.org/W2953522645","https://openalex.org/W2971867419","https://openalex.org/W2974071289","https://openalex.org/W2976948333","https://openalex.org/W2982521860","https://openalex.org/W2983531573","https://openalex.org/W2985543011","https://openalex.org/W2989342034","https://openalex.org/W3000912728","https://openalex.org/W3004528080","https://openalex.org/W3014972121","https://openalex.org/W3016970897","https://openalex.org/W3029504795","https://openalex.org/W3032086959","https://openalex.org/W3034515982","https://openalex.org/W3093541323","https://openalex.org/W3100046612","https://openalex.org/W3100279624","https://openalex.org/W3103934428","https://openalex.org/W3118841180","https://openalex.org/W3125041301","https://openalex.org/W3125798375","https://openalex.org/W3127081983","https://openalex.org/W3133631714","https://openalex.org/W3135371071","https://openalex.org/W3158479996","https://openalex.org/W3184924454","https://openalex.org/W3206271843","https://openalex.org/W3212368439","https://openalex.org/W4248077027","https://openalex.org/W4288083705","https://openalex.org/W4288359825"],"related_works":["https://openalex.org/W31410542","https://openalex.org/W2024831220","https://openalex.org/W4231015519","https://openalex.org/W1780032534","https://openalex.org/W2077003014","https://openalex.org/W2059763708","https://openalex.org/W2061757721","https://openalex.org/W1993376192","https://openalex.org/W48661636","https://openalex.org/W2903754881"],"abstract_inverted_index":{"Data":[0],"is":[1,72],"central":[2],"to":[3,57,100,163,181,184,227,257,302],"the":[4,14,26,45,82,112,199,209,218,221,241,280],"development":[5],"and":[6,42,53,63,90,109,170,192,194,229,246,288,305,312],"evaluation":[7],"of":[8,16,84,115,130,150,211,243,282],"machine":[9],"learning":[10],"(ML)":[11],"models.":[12],"However,":[13,70],"use":[15],"problematic":[17],"or":[18],"inappropriate":[19],"datasets":[20,41,254,283],"can":[21,120],"result":[22,285],"in":[23,172,286],"harms":[24,287,291],"when":[25],"resulting":[27],"models":[28],"are":[29,50,166,205],"deployed.":[30],"To":[31,93],"encourage":[32],"responsible":[33,212,231],"AI":[34,232],"practice":[35],"through":[36],"more":[37,275,295],"deliberate":[38],"reflection":[39],"on":[40,75,260,278],"transparency":[43],"around":[44],"processes":[46],"by":[47],"which":[48],"they":[49,224],"created,":[51],"researchers":[52],"practitioners":[54,136],"have":[55,64],"begun":[56],"advocate":[58],"for":[59,155,177,268,298],"increased":[60],"data":[61,67,78,104,123,164,178,202,270],"documentation":[62,68,79,105,124,165,179,203,271],"proposed":[65],"several":[66],"frameworks.":[69,125],"there":[71],"little":[73],"research":[74],"whether":[76],"these":[77,261,290],"frameworks":[80,180,204,272],"meet":[81],"needs":[83,176,242],"ML":[85,102,135,308],"practitioners,":[86],"who":[87],"both":[88],"create":[89],"consume":[91],"datasets.":[92],"address":[94],"this":[95],"gap,":[96],"we":[97,263],"set":[98],"out":[99],"understand":[101],"practitioners'":[103,309],"perceptions,":[106],"needs,":[107],"challenges,":[108],"desiderata,":[110],"with":[111,133,252],"ultimate":[113],"goal":[114],"deriving":[116],"design":[117,266],"requirements":[118,267],"that":[119,160,201,223,249],"inform":[121],"future":[122,269],"We":[126,144],"conducted":[127],"a":[128,138,148],"series":[129],"semi-structured":[131],"interviews":[132],"14":[134],"at":[137],"single":[139],"large,":[140],"international":[141],"technology":[142],"company.":[143],"had":[145,238],"them":[146],"answer":[147,228],"list":[149],"questions":[151,222],"taken":[152],"from":[153,208],"datasheets":[154],"datasets~\\citegebru2018datasheets.":[156],"Our":[157],"findings":[158],"show":[159],"current":[161],"approaches":[162],"largely":[167],"ad":[168],"hoc":[169],"myopic":[171],"nature.":[173],"Participants":[174],"expressed":[175],"be":[182,293],"adaptable":[183],"their":[185,189,230,253],"contexts,":[186,304],"integrated":[187],"into":[188,307],"existing":[190,310],"tools":[191,311],"workflows,":[193],"automated":[195,300],"wherever":[196],"possible.":[197],"Despite":[198],"fact":[200],"often":[206,237],"motivated":[207],"perspective":[210],"AI,":[213],"participants":[214,236],"did":[215],"not":[216],"make":[217],"connection":[219],"between":[220],"were":[225],"asked":[226],"implications.":[233],"In":[234],"addition,":[235],"difficulties":[239],"prioritizing":[240],"dataset":[244],"consumers":[245],"providing":[247],"information":[248],"someone":[250],"unfamiliar":[251],"might":[255,284,292],"need":[256],"know.":[258],"Based":[259],"findings,":[262],"derive":[264],"seven":[265],"such":[273],"as":[274],"actionable":[276],"guidance":[277],"how":[279,289],"characteristics":[281],"mitigated,":[294],"explicit":[296],"prompts":[297],"reflection,":[299],"adaptation":[301],"different":[303],"integration":[306],"workflows.":[313]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
