{"id":"https://openalex.org/W4289785279","doi":"https://doi.org/10.48550/arxiv.2208.01483","title":"Label Sleuth: From Unlabeled Text to a Classifier in a Few Hours","display_name":"Label Sleuth: From Unlabeled Text to a Classifier in a Few Hours","publication_year":2022,"publication_date":"2022-08-02","ids":{"openalex":"https://openalex.org/W4289785279","doi":"https://doi.org/10.48550/arxiv.2208.01483"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2208.01483","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.01483","pdf_url":"https://arxiv.org/pdf/2208.01483","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2208.01483","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032780546","display_name":"Eyal Shnarch","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shnarch, Eyal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063967207","display_name":"Alon Halfon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Halfon, Alon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009715645","display_name":"Ariel Gera","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gera, Ariel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048281388","display_name":"Marina Danilevsky","orcid":"https://orcid.org/0000-0003-2875-2442"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Danilevsky, Marina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006509575","display_name":"Yannis Katsis","orcid":"https://orcid.org/0000-0002-1733-6227"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Katsis, Yannis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040286212","display_name":"Leshem Choshen","orcid":"https://orcid.org/0000-0002-0085-6496"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choshen, Leshem","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066701171","display_name":"Mart\u00edn Santill\u00e1n Cooper","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cooper, Martin Santillan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026094612","display_name":"Dina Epelboim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Epelboim, Dina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100459168","display_name":"Zheng Zhang","orcid":"https://orcid.org/0000-0003-1470-6998"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062817658","display_name":"Dakuo Wang","orcid":"https://orcid.org/0000-0001-9371-9441"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Dakuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088507810","display_name":"Lucy Yip","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yip, Lucy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107931474","display_name":"Liat Ein\u2010Dor","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ein-Dor, Liat","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053483134","display_name":"Lena Dankin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dankin, Lena","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000738527","display_name":"Ilya Shnayderman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shnayderman, Ilya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035493244","display_name":"Ranit Aharonov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aharonov, Ranit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106404797","display_name":"Yunyao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yunyao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037718770","display_name":"Naftali Liberman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liberman, Naftali","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042964909","display_name":"Philip Levin Slesarev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Slesarev, Philip Levin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022407720","display_name":"Gwilym Newton","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Newton, Gwilym","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056377253","display_name":"Shila Ofek-Koifman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ofek-Koifman, Shila","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017159143","display_name":"Noam Slonim","orcid":"https://orcid.org/0000-0001-5171-8264"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Slonim, Noam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5087402916","display_name":"Yoav Katz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Katz, Yoav","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":22,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.8744000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.8744000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.8690000176429749,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.8686000108718872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.772844672203064},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.747681736946106},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.5527790188789368},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5212600827217102},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5087481141090393},{"id":"https://openalex.org/keywords/lift","display_name":"Lift (data mining)","score":0.48024967312812805},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.45889922976493835},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3299401104450226},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.15744373202323914},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11414104700088501}],"concepts":[{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.772844672203064},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.747681736946106},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.5527790188789368},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5212600827217102},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5087481141090393},{"id":"https://openalex.org/C139002025","wikidata":"https://www.wikidata.org/wiki/Q3001212","display_name":"Lift (data mining)","level":2,"score":0.48024967312812805},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.45889922976493835},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3299401104450226},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.15744373202323914},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11414104700088501},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2208.01483","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.01483","pdf_url":"https://arxiv.org/pdf/2208.01483","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2208.01483","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2208.01483","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2208.01483","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.01483","pdf_url":"https://arxiv.org/pdf/2208.01483","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.8199999928474426,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4389397071","https://openalex.org/W2023045191","https://openalex.org/W2952839243","https://openalex.org/W2124842464","https://openalex.org/W2382657549","https://openalex.org/W4376877853","https://openalex.org/W75565505","https://openalex.org/W4214847518","https://openalex.org/W3009154991","https://openalex.org/W2945555514"],"abstract_inverted_index":{"Text":[0],"classification":[1],"can":[2],"be":[3],"useful":[4],"in":[5,96],"many":[6,35],"real-world":[7],"scenarios,":[8],"saving":[9],"a":[10,19,31,46,64,83,97,119],"lot":[11],"of":[12,121,130],"time":[13],"for":[14,34,51,61,104],"end":[15],"users.":[16,37],"However,":[17],"building":[18],"custom":[20,84],"classifier":[21,95],"typically":[22],"requires":[23],"coding":[24],"skills":[25],"and":[26,53,100,106,123],"ML":[27],"knowledge,":[28],"which":[29],"poses":[30],"significant":[32],"barrier":[33],"potential":[36],"To":[38],"lift":[39],"this":[40],"barrier,":[41],"we":[42,115],"introduce":[43],"Label":[44,113],"Sleuth,":[45],"free":[47],"open":[48,103,111],"source":[49],"system":[50,58],"labeling":[52,78],"creating":[54],"text":[55],"classifiers.":[56],"This":[57],"is":[59],"unique":[60],"(a)":[62],"being":[63,102],"no-code":[65],"system,":[66],"making":[67,86],"NLP":[68,131],"accessible":[69],"to":[70,94,117],"non-experts,":[71],"(b)":[72],"guiding":[73],"users":[74,122],"through":[75],"the":[76,87,128],"entire":[77],"process":[79,88],"until":[80],"they":[81],"obtain":[82],"classifier,":[85],"efficient":[89],"--":[90],"from":[91],"cold":[92],"start":[93],"few":[98],"hours,":[99],"(c)":[101],"configuration":[105],"extension":[107],"by":[108],"developers.":[109],"By":[110],"sourcing":[112],"Sleuth":[114],"hope":[116],"build":[118],"community":[120],"developers":[124],"that":[125],"will":[126],"broaden":[127],"utilization":[129],"models.":[132]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
