{"id":"https://openalex.org/W3161776805","doi":"https://doi.org/10.1145/3411763.3451617","title":"What Makes a Well-Documented Notebook? A Case Study of Data Scientists\u2019 Documentation Practices in Kaggle","display_name":"What Makes a Well-Documented Notebook? A Case Study of Data Scientists\u2019 Documentation Practices in Kaggle","publication_year":2021,"publication_date":"2021-05-08","ids":{"openalex":"https://openalex.org/W3161776805","doi":"https://doi.org/10.1145/3411763.3451617","mag":"3161776805"},"language":"en","primary_location":{"id":"doi:10.1145/3411763.3451617","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3411763.3451617","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Extended Abstracts of the 2021 CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046673805","display_name":"April Yi Wang","orcid":"https://orcid.org/0000-0001-8724-4662"},"institutions":[{"id":"https://openalex.org/I4210111179","display_name":"Michigan United","ror":"https://ror.org/0291ys696","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210111179"]},{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"April Yi Wang","raw_affiliation_strings":["School of Information University of Michigan, United States"],"affiliations":[{"raw_affiliation_string":"School of Information University of Michigan, United States","institution_ids":["https://openalex.org/I27837315","https://openalex.org/I4210111179"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062817658","display_name":"Dakuo Wang","orcid":"https://orcid.org/0000-0001-9371-9441"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dakuo Wang","raw_affiliation_strings":["IBM Research, United States"],"affiliations":[{"raw_affiliation_string":"IBM Research, United States","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031289715","display_name":"Jaimie Drozdal","orcid":"https://orcid.org/0000-0002-7153-9068"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaimie Drozdal","raw_affiliation_strings":["Cognitive and Immersive Systems Lab Rensselaer Polytechnic Institute, United States"],"affiliations":[{"raw_affiliation_string":"Cognitive and Immersive Systems Lab Rensselaer Polytechnic Institute, United States","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012411636","display_name":"Xuye Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xuye Liu","raw_affiliation_strings":["Rensselaer Polytechnic Institute, United States"],"affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute, United States","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075027005","display_name":"Soya Park","orcid":"https://orcid.org/0000-0002-2149-4420"},"institutions":[{"id":"https://openalex.org/I126820664","display_name":"Vassar College","ror":"https://ror.org/022x6qg61","country_code":"US","type":"education","lineage":["https://openalex.org/I126820664"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Soya Park","raw_affiliation_strings":["CSAIL MIT, United States"],"affiliations":[{"raw_affiliation_string":"CSAIL MIT, United States","institution_ids":["https://openalex.org/I126820664"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069296306","display_name":"Steve Oney","orcid":null},"institutions":[{"id":"https://openalex.org/I4210111179","display_name":"Michigan United","ror":"https://ror.org/0291ys696","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210111179"]},{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steve Oney","raw_affiliation_strings":["School of Information University of Michigan, United States"],"affiliations":[{"raw_affiliation_string":"School of Information University of Michigan, United States","institution_ids":["https://openalex.org/I27837315","https://openalex.org/I4210111179"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081802677","display_name":"Christopher Brooks","orcid":"https://orcid.org/0000-0003-0875-0204"},"institutions":[{"id":"https://openalex.org/I4210111179","display_name":"Michigan United","ror":"https://ror.org/0291ys696","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210111179"]},{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Brooks","raw_affiliation_strings":["School of Information University of Michigan, United States"],"affiliations":[{"raw_affiliation_string":"School of Information University of Michigan, United States","institution_ids":["https://openalex.org/I27837315","https://openalex.org/I4210111179"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5046673805"],"corresponding_institution_ids":["https://openalex.org/I27837315","https://openalex.org/I4210111179"],"apc_list":null,"apc_paid":null,"fwci":5.7837,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.96108925,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.953335165977478},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7731516361236572},{"id":"https://openalex.org/keywords/narrative","display_name":"Narrative","score":0.5989627242088318},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.5974048972129822},{"id":"https://openalex.org/keywords/proxy","display_name":"Proxy (statistics)","score":0.5734308958053589},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4931144416332245},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.4375397264957428},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3878024220466614},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.14712899923324585},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09212848544120789},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.09164375066757202}],"concepts":[{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.953335165977478},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7731516361236572},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.5989627242088318},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5974048972129822},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.5734308958053589},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4931144416332245},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.4375397264957428},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3878024220466614},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14712899923324585},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09212848544120789},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.09164375066757202},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3411763.3451617","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3411763.3451617","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Extended Abstracts of the 2021 CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2123333323","https://openalex.org/W2133890582","https://openalex.org/W2148469619","https://openalex.org/W2163745066","https://openalex.org/W2168382229","https://openalex.org/W2771189628","https://openalex.org/W2784241156","https://openalex.org/W2796040126","https://openalex.org/W2887274410","https://openalex.org/W2898671507","https://openalex.org/W2941766203","https://openalex.org/W2951102128","https://openalex.org/W2964194820","https://openalex.org/W2968484890","https://openalex.org/W2995332686","https://openalex.org/W3012094108","https://openalex.org/W3029504795","https://openalex.org/W3042955694","https://openalex.org/W3093839314","https://openalex.org/W3119940315","https://openalex.org/W3120370895","https://openalex.org/W3122753382","https://openalex.org/W3150176278","https://openalex.org/W3162176464","https://openalex.org/W4288086169","https://openalex.org/W4289219086"],"related_works":["https://openalex.org/W2618286804","https://openalex.org/W2329643025","https://openalex.org/W2002770077","https://openalex.org/W3131163342","https://openalex.org/W2092256833","https://openalex.org/W2142369114","https://openalex.org/W2361728394","https://openalex.org/W2166271660","https://openalex.org/W2352631095","https://openalex.org/W3038222916"],"abstract_inverted_index":{"Many":[0],"data":[1,41,59,79,136,154],"scientists":[2,60,80,137],"use":[3],"computational":[4],"notebooks":[5,55,94,117],"to":[6,35,61,81,124],"test":[7],"and":[8,18,23,83,90,110,143,164],"present":[9],"their":[10,140],"work,":[11],"as":[12,56,95],"a":[13,40,48,51,74,96,102],"notebook":[14,108],"can":[15],"weave":[16],"code":[17,28],"documentation":[19,38,141,145],"together":[20],"(computational":[21],"narrative),":[22],"support":[24],"rapid":[25],"iteration":[26],"on":[27],"experiments.":[29],"However,":[30],"it":[31],"is":[32,47],"not":[33],"easy":[34],"write":[36,138],"good":[37],"in":[39,85,122,139],"science":[42,155],"notebook,":[43],"partially":[44],"because":[45],"there":[46],"lack":[49],"of":[50,53,133,152],"corpus":[52],"well-documented":[54,99],"exemplars":[57],"for":[58,78,98],"follow.":[62],"To":[63],"cope":[64],"with":[65,149,161],"this":[66,68],"challenge,":[67],"work":[69],"looks":[70],"at":[71,105],"Kaggle":[72,93],"\u2014":[73,89],"large":[75],"online":[76],"community":[77],"host":[82],"participate":[84],"machine":[86],"learning":[87],"competitions":[88],"considers":[91],"highly-voted":[92],"proxy":[97],"notebooks.":[100],"Through":[101],"qualitative":[103],"analysis":[104,128],"both":[106],"the":[107,111,153,159],"level":[109],"markdown-cell":[112],"level,":[113],"we":[114],"find":[115],"these":[116,144],"are":[118],"indeed":[119],"well":[120],"documented":[121],"reference":[123],"previous":[125],"literature.":[126],"Our":[127],"also":[129],"reveals":[130],"nine":[131],"categories":[132],"content":[134],"that":[135],"cells,":[142],"cells":[146],"often":[147],"interplay":[148],"different":[150],"stages":[151],"lifecycle.":[156],"We":[157],"conclude":[158],"paper":[160],"design":[162],"implications":[163],"future":[165],"research":[166],"directions.":[167]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
