{"id":"https://openalex.org/W2996426982","doi":"https://doi.org/10.1145/3351095.3372862","title":"Garbage in, garbage out?","display_name":"Garbage in, garbage out?","publication_year":2020,"publication_date":"2020-01-27","ids":{"openalex":"https://openalex.org/W2996426982","doi":"https://doi.org/10.1145/3351095.3372862","mag":"2996426982"},"language":"en","primary_location":{"id":"doi:10.1145/3351095.3372862","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3351095.3372862","pdf_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3351095.3372862&file=p325-geiger-supp.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3351095.3372862&file=p325-geiger-supp.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088070105","display_name":"R. Stuart Geiger","orcid":"https://orcid.org/0000-0001-7215-0532"},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"R. Stuart Geiger","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101719187","display_name":"Kevin Yu","orcid":"https://orcid.org/0000-0001-9224-3891"},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin Yu","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020206254","display_name":"Yanlai Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanlai Yang","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034752394","display_name":"Mindy Dai","orcid":null},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mindy Dai","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038029456","display_name":"Jie Qiu","orcid":"https://orcid.org/0000-0003-0582-1767"},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jie Qiu","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085004332","display_name":"Rebekah Tang","orcid":"https://orcid.org/0000-0003-4563-5192"},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rebekah Tang","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023727242","display_name":"Jenny Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jenny Huang","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5088070105"],"corresponding_institution_ids":["https://openalex.org/I2803209242"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01417529,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"325","last_page":"336"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7845467329025269},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7827039957046509},{"id":"https://openalex.org/keywords/garbage","display_name":"Garbage","score":0.6610039472579956},{"id":"https://openalex.org/keywords/compensation","display_name":"Compensation (psychology)","score":0.6049107313156128},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.5676852464675903},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5638201236724854},{"id":"https://openalex.org/keywords/data-reliability","display_name":"Data reliability","score":0.5374454259872437},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5349287986755371},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4713420569896698},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46646225452423096},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.451806902885437},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.40724706649780273},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.19509819149971008},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.09326982498168945}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7845467329025269},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7827039957046509},{"id":"https://openalex.org/C75403996","wikidata":"https://www.wikidata.org/wiki/Q5521979","display_name":"Garbage","level":2,"score":0.6610039472579956},{"id":"https://openalex.org/C2780023022","wikidata":"https://www.wikidata.org/wiki/Q1338171","display_name":"Compensation (psychology)","level":2,"score":0.6049107313156128},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.5676852464675903},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5638201236724854},{"id":"https://openalex.org/C2988402297","wikidata":"https://www.wikidata.org/wiki/Q65558817","display_name":"Data reliability","level":2,"score":0.5374454259872437},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5349287986755371},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4713420569896698},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46646225452423096},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.451806902885437},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.40724706649780273},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.19509819149971008},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09326982498168945},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C11171543","wikidata":"https://www.wikidata.org/wiki/Q41630","display_name":"Psychoanalysis","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3351095.3372862","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3351095.3372862","pdf_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3351095.3372862&file=p325-geiger-supp.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency","raw_type":"proceedings-article"},{"id":"mag:2996426982","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"doi:10.1145/3351095.3372862","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3351095.3372862","pdf_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3351095.3372862&file=p325-geiger-supp.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G662344573","display_name":null,"funder_award_id":"GBMF3834","funder_id":"https://openalex.org/F4320306202","funder_display_name":"Gordon and Betty Moore Foundation"},{"id":"https://openalex.org/G955715661","display_name":null,"funder_award_id":"2013-10-27","funder_id":"https://openalex.org/F4320306151","funder_display_name":"Alfred P. Sloan Foundation"}],"funders":[{"id":"https://openalex.org/F4320306151","display_name":"Alfred P. Sloan Foundation","ror":"https://ror.org/052csg198"},{"id":"https://openalex.org/F4320306202","display_name":"Gordon and Betty Moore Foundation","ror":"https://ror.org/006wxqw41"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2996426982.pdf","grobid_xml":"https://content.openalex.org/works/W2996426982.grobid-xml"},"referenced_works_count":64,"referenced_works":["https://openalex.org/W107138002","https://openalex.org/W172170795","https://openalex.org/W202799293","https://openalex.org/W1522019205","https://openalex.org/W1554944419","https://openalex.org/W1563918824","https://openalex.org/W1968993977","https://openalex.org/W1991327380","https://openalex.org/W2011301426","https://openalex.org/W2022710553","https://openalex.org/W2022913092","https://openalex.org/W2050793659","https://openalex.org/W2056973989","https://openalex.org/W2061939373","https://openalex.org/W2062172061","https://openalex.org/W2068743873","https://openalex.org/W2119830539","https://openalex.org/W2120824164","https://openalex.org/W2138664283","https://openalex.org/W2146292423","https://openalex.org/W2163284576","https://openalex.org/W2278629362","https://openalex.org/W2293246850","https://openalex.org/W2295675274","https://openalex.org/W2329395632","https://openalex.org/W2342249984","https://openalex.org/W2437990191","https://openalex.org/W2468363730","https://openalex.org/W2472803348","https://openalex.org/W2487770199","https://openalex.org/W2496371950","https://openalex.org/W2557283755","https://openalex.org/W2607311634","https://openalex.org/W2769563773","https://openalex.org/W2770372582","https://openalex.org/W2788969155","https://openalex.org/W2795038878","https://openalex.org/W2796571581","https://openalex.org/W2802642435","https://openalex.org/W2886900553","https://openalex.org/W2888551908","https://openalex.org/W2889249015","https://openalex.org/W2896833840","https://openalex.org/W2897042519","https://openalex.org/W2906183835","https://openalex.org/W2914228034","https://openalex.org/W2939930770","https://openalex.org/W2944010778","https://openalex.org/W2948426604","https://openalex.org/W2963508088","https://openalex.org/W2994816012","https://openalex.org/W3017863658","https://openalex.org/W3094328607","https://openalex.org/W3100279624","https://openalex.org/W3110197861","https://openalex.org/W3122548859","https://openalex.org/W3125261728","https://openalex.org/W3133874049","https://openalex.org/W3154739585","https://openalex.org/W3157172840","https://openalex.org/W4233093336","https://openalex.org/W4244021162","https://openalex.org/W4252535617","https://openalex.org/W6969145360"],"related_works":["https://openalex.org/W7655147","https://openalex.org/W149980","https://openalex.org/W1614943","https://openalex.org/W8136493","https://openalex.org/W11991885","https://openalex.org/W13607926","https://openalex.org/W6161656","https://openalex.org/W10648265","https://openalex.org/W14204599","https://openalex.org/W1750062"],"abstract_inverted_index":{"Many":[0],"machine":[1,71,185],"learning":[2,72,186],"projects":[3],"for":[4,15,158],"new":[5],"application":[6,73],"areas":[7],"involve":[8],"teams":[9],"of":[10,70,110,114,148,184,199,212],"humans":[11],"who":[12,125],"label":[13],"data":[14,28,93,166,201,215],"a":[16,31,38,46,68,172,196],"particular":[17],"purpose,":[18],"from":[19,81],"hiring":[20],"crowdworkers":[21,159],"to":[22,36,65,154],"the":[23,27,50,122,126,137,164,209,219],"paper's":[24],"authors":[25],"labeling":[26],"themselves.":[29],"Such":[30],"task":[32,90],"is":[33,45,160,167,193,202,216],"quite":[34],"similar":[35],"(or":[37],"form":[39],"of)":[40],"structured":[41,111],"content":[42,112],"analysis,":[43],"which":[44],"longstanding":[47],"methodology":[48],"in":[49,75,175,218],"social":[51,76],"sciences":[52],"and":[53,83,162,181,188],"humanities,":[54],"with":[55],"many":[56],"established":[57],"best":[58,101],"practices.":[59],"In":[60],"this":[61],"paper,":[62,116],"we":[63,205],"investigate":[64],"what":[66,129,146,192],"extent":[67],"sample":[69],"papers":[74,80],"computing":[77],"---":[78,94],"specifically":[79],"ArXiv":[82],"traditional":[84],"publications":[85],"performing":[86],"an":[87],"ML":[88],"classification":[89],"on":[91,191],"Twitter":[92],"give":[95],"specific":[96],"details":[97],"about":[98],"whether":[99,133,140,156,176,213],"such":[100,119,177,214],"practices":[102,178],"were":[103,144,152,179],"followed.":[104],"Our":[105],"team":[106],"conducted":[107],"multiple":[108],"rounds":[109],"analysis":[113],"each":[115],"making":[117],"determinations":[118],"as:":[120],"Does":[121],"paper":[123],"report":[124],"labelers":[127],"were,":[128,132],"their":[130],"qualifications":[131],"they":[134],"independently":[135],"labeled":[136],"same":[138],"items,":[139],"inter-rater":[141],"reliability":[142],"metrics":[143],"disclosed,":[145,161],"level":[147],"training":[149,165,200],"and/or":[150],"instructions":[151],"given":[153],"labelers,":[155],"compensation":[157],"if":[163],"publicly":[168],"available.":[169],"We":[170],"find":[171],"wide":[173],"divergence":[174],"followed":[180],"documented.":[182],"Much":[183],"research":[187],"education":[189],"focuses":[190],"done":[194],"once":[195],"\"gold":[197],"standard\"":[198],"available,":[203],"but":[204],"discuss":[206],"issues":[207],"around":[208],"equally-important":[210],"aspect":[211],"reliable":[217],"first":[220],"place.":[221]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
