{"id":"https://openalex.org/W2068303084","doi":"https://doi.org/10.1145/2516889","title":"Improving Text Classification Accuracy by Training Label Cleaning","display_name":"Improving Text Classification Accuracy by Training Label Cleaning","publication_year":2013,"publication_date":"2013-11-01","ids":{"openalex":"https://openalex.org/W2068303084","doi":"https://doi.org/10.1145/2516889","mag":"2068303084"},"language":"en","primary_location":{"id":"doi:10.1145/2516889","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2516889","pdf_url":null,"source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082069715","display_name":"Andrea Esuli","orcid":"https://orcid.org/0000-0002-5725-4322"},"institutions":[{"id":"https://openalex.org/I4210144062","display_name":"Consorzio Roma Ricerche","ror":"https://ror.org/03jvpn714","country_code":"IT","type":"nonprofit","lineage":["https://openalex.org/I4210144062"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Andrea Esuli","raw_affiliation_strings":["Consiglio Nazionale delle Ricerche, Italy"],"affiliations":[{"raw_affiliation_string":"Consiglio Nazionale delle Ricerche, Italy","institution_ids":["https://openalex.org/I4210144062"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063975186","display_name":"Fabrizio Sebastiani","orcid":"https://orcid.org/0000-0003-4221-6427"},"institutions":[{"id":"https://openalex.org/I4210144062","display_name":"Consorzio Roma Ricerche","ror":"https://ror.org/03jvpn714","country_code":"IT","type":"nonprofit","lineage":["https://openalex.org/I4210144062"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Fabrizio Sebastiani","raw_affiliation_strings":["Consiglio Nazionale delle Ricerche, Italy"],"affiliations":[{"raw_affiliation_string":"Consiglio Nazionale delle Ricerche, Italy","institution_ids":["https://openalex.org/I4210144062"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5082069715"],"corresponding_institution_ids":["https://openalex.org/I4210144062"],"apc_list":null,"apc_paid":null,"fwci":2.8876,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.91561112,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"31","issue":"4","first_page":"1","last_page":"28"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8389208912849426},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.6748191714286804},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6532917022705078},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.6498889923095703},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6184539794921875},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.555302619934082},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.5304398536682129},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.48626601696014404},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.47053298354148865},{"id":"https://openalex.org/keywords/sort","display_name":"sort","score":0.4196249842643738},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.1918458342552185}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8389208912849426},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.6748191714286804},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6532917022705078},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.6498889923095703},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6184539794921875},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.555302619934082},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.5304398536682129},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.48626601696014404},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.47053298354148865},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.4196249842643738},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.1918458342552185},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2516889","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2516889","pdf_url":null,"source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.385.5939","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.385.5939","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://nmis.isti.cnr.it/sebastiani/Publications/TOIS13.pdf","raw_type":"text"},{"id":"pmh:oai:dnet:people______::6f6f71b0db4f059f5df6ddb3d57a6f0e","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S7407055261","display_name":"ISTI Open Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM TRANSACTIONS ON INFORMATION SYSTEMS, vol. 31 (issue 4), pp. 19-28","raw_type":"Journal article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W123339444","https://openalex.org/W140312209","https://openalex.org/W1479807131","https://openalex.org/W1515346963","https://openalex.org/W1523389133","https://openalex.org/W1523892760","https://openalex.org/W1529196404","https://openalex.org/W1539210413","https://openalex.org/W1588207936","https://openalex.org/W1605688901","https://openalex.org/W1744114474","https://openalex.org/W1850527962","https://openalex.org/W1926592634","https://openalex.org/W1964488740","https://openalex.org/W1970381522","https://openalex.org/W1981276685","https://openalex.org/W1985554184","https://openalex.org/W1990334093","https://openalex.org/W2006873020","https://openalex.org/W2012725350","https://openalex.org/W2024046085","https://openalex.org/W2043765769","https://openalex.org/W2050672162","https://openalex.org/W2053463056","https://openalex.org/W2060216474","https://openalex.org/W2063198646","https://openalex.org/W2071664212","https://openalex.org/W2076118331","https://openalex.org/W2089421877","https://openalex.org/W2110327402","https://openalex.org/W2114535528","https://openalex.org/W2122254380","https://openalex.org/W2128097790","https://openalex.org/W2135892731","https://openalex.org/W2147891303","https://openalex.org/W2150102617","https://openalex.org/W2151023586","https://openalex.org/W2157023969","https://openalex.org/W2165062189","https://openalex.org/W2165886501","https://openalex.org/W2172162418","https://openalex.org/W2912150279","https://openalex.org/W2912934387","https://openalex.org/W2930957955","https://openalex.org/W3022479311","https://openalex.org/W4244952642","https://openalex.org/W4251560691"],"related_works":["https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W4231274751","https://openalex.org/W1529840045","https://openalex.org/W4244036394","https://openalex.org/W1842879116","https://openalex.org/W2135107501","https://openalex.org/W2047248895","https://openalex.org/W1822895636","https://openalex.org/W2551249631"],"abstract_inverted_index":{"In":[0],"text":[1],"classification":[2,126,166],"(TC)":[3],"and":[4,21,174],"other":[5],"tasks":[6],"involving":[7],"supervised":[8],"learning,":[9],"labelled":[10],"data":[11],"may":[12],"be":[13],"scarce":[14],"or":[15],"expensive":[16],"to":[17,61,105,112,175],"obtain.":[18],"Semisupervised":[19],"learning":[20,23,122],"active":[22],"are":[24],"two":[25],"strategies":[26,45],"whose":[27],"aim":[28],"is":[29,88],"maximizing":[30],"the":[31,34,78,90,102,107,118,163],"effectiveness":[32,167],"of":[33,41,84,120,147],"resulting":[35],"classifiers":[36],"for":[37,50,101,124,132,152],"a":[38,62,98],"given":[39],"amount":[40],"training":[42,66,80,108,149,178],"effort.":[43],"Both":[44],"have":[46,157],"been":[47,59],"actively":[48],"investigated":[49],"TC":[51,140],"in":[52,72,82,117,165],"recent":[53],"years.":[54],"Much":[55],"less":[56],"research":[57],"has":[58,93],"devoted":[60],"third":[63],"such":[64],"strategy,":[65],"label":[67,179],"cleaning":[68,180],"(TLC),":[69],"which":[70],"consists":[71],"devising":[73],"ranking":[74],"functions":[75],"that":[76,89,168],"sort":[77],"original":[79],"examples":[81],"terms":[83],"how":[85],"likely":[86],"it":[87],"human":[91,103],"annotator":[92,104],"mislabelled":[94,170],"them.":[95],"This":[96],"provides":[97],"convenient":[99],"means":[100],"revise":[106],"set":[109],"so":[110],"as":[111],"improve":[113],"its":[114],"quality.":[115],"Working":[116],"context":[119],"boosting-based":[121],"methods":[123],"multilabel":[125],"we":[127,156],"present":[128],"three":[129,137],"different":[130],"techniques":[131],"performing":[133],"TLC":[134],"and,":[135],"on":[136],"widely":[138],"used":[139],"benchmarks,":[141],"evaluate":[142,162],"them":[143],"by":[144],"their":[145],"capability":[146],"spotting":[148],"documents":[150],"that,":[151],"experimental":[153],"reasons":[154],"only,":[155],"purposefully":[158],"mislabelled.":[159],"We":[160],"also":[161],"degradation":[164],"these":[169],"texts":[171],"bring":[172],"about,":[173],"what":[176],"extent":[177],"can":[181],"prevent":[182],"this":[183],"degradation.":[184]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":8},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":1}],"updated_date":"2026-03-02T08:37:19.008085","created_date":"2025-10-10T00:00:00"}
