{"id":"https://openalex.org/W2125943921","doi":"https://doi.org/10.1145/1401890.1401965","title":"Get another label? improving data quality and data mining using multiple, noisy labelers","display_name":"Get another label? improving data quality and data mining using multiple, noisy labelers","publication_year":2008,"publication_date":"2008-08-24","ids":{"openalex":"https://openalex.org/W2125943921","doi":"https://doi.org/10.1145/1401890.1401965","mag":"2125943921"},"language":"en","primary_location":{"id":"doi:10.1145/1401890.1401965","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1401890.1401965","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051706630","display_name":"Victor S. Sheng","orcid":"https://orcid.org/0000-0003-4960-174X"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Victor S. Sheng","raw_affiliation_strings":["Leonard N. Stern School of Business, New York University, New York, NY, USA","[Leonard N. Stern School of Business, New York University, New York, NY, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Leonard N. Stern School of Business, New York University, New York, NY, USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"[Leonard N. Stern School of Business, New York University, New York, NY, USA]","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037283651","display_name":"Foster Provost","orcid":"https://orcid.org/0000-0002-0307-3884"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Foster Provost","raw_affiliation_strings":["Leonard N. Stern School of Business, New York University, New York, NY, USA","[Leonard N. Stern School of Business, New York University, New York, NY, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Leonard N. Stern School of Business, New York University, New York, NY, USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"[Leonard N. Stern School of Business, New York University, New York, NY, USA]","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010731709","display_name":"Panagiotis G. Ipeirotis","orcid":"https://orcid.org/0000-0002-2966-7402"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Panagiotis G. Ipeirotis","raw_affiliation_strings":["Leonard N. Stern School of Business, New York University, New York, NY, USA","[Leonard N. Stern School of Business, New York University, New York, NY, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Leonard N. Stern School of Business, New York University, New York, NY, USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"[Leonard N. Stern School of Business, New York University, New York, NY, USA]","institution_ids":["https://openalex.org/I57206974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5051706630"],"corresponding_institution_ids":["https://openalex.org/I57206974"],"apc_list":null,"apc_paid":null,"fwci":81.6676,"has_fulltext":false,"cited_by_count":1108,"citation_normalized_percentile":{"value":0.99962144,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"614","last_page":"622"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7907192707061768},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.626776933670044},{"id":"https://openalex.org/keywords/sequence-labeling","display_name":"Sequence labeling","score":0.6016872525215149},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5392962098121643},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.5329424142837524},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5289796590805054},{"id":"https://openalex.org/keywords/imperfect","display_name":"Imperfect","score":0.5237032175064087},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5149908661842346},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.48219791054725647},{"id":"https://openalex.org/keywords/outsourcing","display_name":"Outsourcing","score":0.4520048499107361},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.447075754404068},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.43509992957115173},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3886175751686096},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.09964638948440552},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08202710747718811}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7907192707061768},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.626776933670044},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.6016872525215149},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5392962098121643},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.5329424142837524},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5289796590805054},{"id":"https://openalex.org/C2780310539","wikidata":"https://www.wikidata.org/wiki/Q12547192","display_name":"Imperfect","level":2,"score":0.5237032175064087},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5149908661842346},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48219791054725647},{"id":"https://openalex.org/C46934059","wikidata":"https://www.wikidata.org/wiki/Q61515","display_name":"Outsourcing","level":2,"score":0.4520048499107361},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.447075754404068},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.43509992957115173},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3886175751686096},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.09964638948440552},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08202710747718811},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1401890.1401965","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1401890.1401965","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.44999998807907104}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W9014458","https://openalex.org/W167016754","https://openalex.org/W1504694836","https://openalex.org/W1553533669","https://openalex.org/W1562286606","https://openalex.org/W1562418542","https://openalex.org/W1570448133","https://openalex.org/W1819386543","https://openalex.org/W1984712439","https://openalex.org/W2024659816","https://openalex.org/W2026889607","https://openalex.org/W2027819915","https://openalex.org/W2045656233","https://openalex.org/W2058732827","https://openalex.org/W2083856558","https://openalex.org/W2084812512","https://openalex.org/W2096742462","https://openalex.org/W2098737324","https://openalex.org/W2104597806","https://openalex.org/W2109121923","https://openalex.org/W2112683085","https://openalex.org/W2112876447","https://openalex.org/W2114968414","https://openalex.org/W2125055259","https://openalex.org/W2137507956","https://openalex.org/W2140679654","https://openalex.org/W2142261479","https://openalex.org/W2144372981","https://openalex.org/W2144660879","https://openalex.org/W2151023586","https://openalex.org/W2152880776","https://openalex.org/W2156935079","https://openalex.org/W2165742564","https://openalex.org/W2904339734","https://openalex.org/W2911964244","https://openalex.org/W2964075712","https://openalex.org/W2966207845","https://openalex.org/W2997149703","https://openalex.org/W3014245600","https://openalex.org/W3020348090","https://openalex.org/W3112138688","https://openalex.org/W4232383088","https://openalex.org/W6600350647","https://openalex.org/W6629247654","https://openalex.org/W6633023078","https://openalex.org/W6677091266","https://openalex.org/W6681729498"],"related_works":["https://openalex.org/W3032998312","https://openalex.org/W135177976","https://openalex.org/W4384486036","https://openalex.org/W1554561885","https://openalex.org/W3035097673","https://openalex.org/W4287800519","https://openalex.org/W2530675634","https://openalex.org/W3022333975","https://openalex.org/W4392918422","https://openalex.org/W4384786634"],"abstract_inverted_index":{"This":[0],"paper":[1],"addresses":[2],"the":[3,12,18,33,42,72,76,124,137,141,148,197,227],"repeated":[4,27,114],"acquisition":[5,208],"of":[6,35,44,75,89,139,151,167,182,209],"labels":[7,37,111,128,211],"for":[8,38,49,188,223],"data":[9,24,77,143,186,216],"items":[10],"when":[11,202],"labeling":[13,64,115,121,152,162,203],"is":[14,59,144,169,204,212,229],"imperfect.":[15],"We":[16,85],"examine":[17],"improvement":[19,34],"(or":[20],"lack":[21],"thereof)":[22],"in":[23,123,220],"quality":[25,102,190],"via":[26,51],"labeling,":[28,70],"and":[29,92,103,172],"focus":[30],"especially":[31],"on":[32],"training":[36],"supervised":[39],"induction.":[40],"With":[41,68],"outsourcing":[43],"small":[45],"tasks":[46],"becoming":[47],"easier,":[48],"example":[50],"Rent-A-Coder":[52],"or":[53],"Amazon's":[54],"Mechanical":[55],"Turk,":[56],"it":[57],"often":[58],"possible":[60],"to":[61,119,184],"obtain":[62],"less-than-expert":[63],"at":[65],"low":[66],"cost.":[67],"low-cost":[69],"preparing":[71],"unlabeled":[73,142],"part":[74],"can":[78,99,116,156],"become":[79],"considerably":[80],"more":[81],"expensive":[82],"than":[83],"labeling.":[84],"present":[86,174],"repeated-labeling":[87],"strategies":[88],"increasing":[90],"complexity,":[91],"show":[93,199],"several":[94],"main":[95],"results.":[96],"(i)":[97],"Repeated-labeling":[98],"improve":[100],"label":[101],"model":[104],"quality,":[105],"but":[106],"not":[107,130,145,205],"always.":[108],"(ii)":[109],"When":[110],"are":[112,129],"noisy,":[113],"be":[117,192],"preferable":[118],"single":[120],"even":[122,147],"traditional":[125],"setting":[126],"where":[127],"particularly":[131],"cheap.":[132],"(iii)":[133],"As":[134],"soon":[135],"as":[136],"cost":[138],"processing":[140],"free,":[146],"simple":[149],"strategy":[150,214],"everything":[153],"multiple":[154,210],"times":[155],"give":[157],"considerable":[158],"advantage.":[159],"(iv)":[160],"Repeatedly":[161],"a":[163,175,213],"carefully":[164],"chosen":[165],"set":[166],"points":[168,187],"generally":[170],"preferable,":[171],"we":[173],"robust":[176],"technique":[177],"that":[178,201,215],"combines":[179],"different":[180],"notions":[181],"uncertainty":[183],"select":[185],"which":[189],"should":[191,218],"improved.":[193],"The":[194],"bottom":[195],"line:":[196],"results":[198],"clearly":[200],"perfect,":[206],"selective":[207],"miners":[217],"have":[219],"their":[221],"repertoire;":[222],"certain":[224],"label-quality/cost":[225],"regimes,":[226],"benefit":[228],"substantial.":[230]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":33},{"year":2024,"cited_by_count":47},{"year":2023,"cited_by_count":49},{"year":2022,"cited_by_count":65},{"year":2021,"cited_by_count":76},{"year":2020,"cited_by_count":63},{"year":2019,"cited_by_count":78},{"year":2018,"cited_by_count":71},{"year":2017,"cited_by_count":72},{"year":2016,"cited_by_count":68},{"year":2015,"cited_by_count":93},{"year":2014,"cited_by_count":91},{"year":2013,"cited_by_count":74},{"year":2012,"cited_by_count":81}],"updated_date":"2026-05-10T08:33:47.465468","created_date":"2025-10-10T00:00:00"}
