{"id":"https://openalex.org/W2983820245","doi":"https://doi.org/10.26615/978-954-452-056-4_022","title":"Sparse Victory \u2013 A Large Scale Systematic Comparison of Count-Based and Prediction-Based Vectorizers for Text Classification","display_name":"Sparse Victory \u2013 A Large Scale Systematic Comparison of Count-Based and Prediction-Based Vectorizers for Text Classification","publication_year":2019,"publication_date":"2019-10-22","ids":{"openalex":"https://openalex.org/W2983820245","doi":"https://doi.org/10.26615/978-954-452-056-4_022","mag":"2983820245"},"language":"en","primary_location":{"id":"doi:10.26615/978-954-452-056-4_022","is_oa":true,"landing_page_url":"http://doi.org/10.26615/978-954-452-056-4_022","pdf_url":"https://doi.org/10.26615/978-954-452-056-4_022","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings - Natural Language Processing in a Deep Learning World","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.26615/978-954-452-056-4_022","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102777912","display_name":"Rupak Chakraborty","orcid":"https://orcid.org/0000-0002-2900-5863"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rupak Chakraborty","raw_affiliation_strings":["Adobe Inc, India"],"affiliations":[{"raw_affiliation_string":"Adobe Inc, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021935761","display_name":"Ashima Elhence","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ashima Elhence","raw_affiliation_strings":["Adobe Inc, India"],"affiliations":[{"raw_affiliation_string":"Adobe Inc, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057513936","display_name":"Kapil Arora","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kapil Arora","raw_affiliation_strings":["Adobe Inc, India"],"affiliations":[{"raw_affiliation_string":"Adobe Inc, India","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102777912"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.42,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.71785863,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"188","last_page":"197"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.7476162910461426},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7400701642036438},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6053988337516785},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5062019228935242},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.45283883810043335},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4525444507598877},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.44042670726776123},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33657336235046387},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.31589341163635254},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.31326669454574585},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13062119483947754}],"concepts":[{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.7476162910461426},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7400701642036438},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6053988337516785},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5062019228935242},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.45283883810043335},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4525444507598877},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.44042670726776123},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33657336235046387},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31589341163635254},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.31326669454574585},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13062119483947754},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.26615/978-954-452-056-4_022","is_oa":true,"landing_page_url":"http://doi.org/10.26615/978-954-452-056-4_022","pdf_url":"https://doi.org/10.26615/978-954-452-056-4_022","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings - Natural Language Processing in a Deep Learning World","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.26615/978-954-452-056-4_022","is_oa":true,"landing_page_url":"http://doi.org/10.26615/978-954-452-056-4_022","pdf_url":"https://doi.org/10.26615/978-954-452-056-4_022","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings - Natural Language Processing in a Deep Learning World","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2983820245.pdf","grobid_xml":"https://content.openalex.org/works/W2983820245.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1880262756","https://openalex.org/W2070996757","https://openalex.org/W2091273188","https://openalex.org/W2111537739","https://openalex.org/W2140679639","https://openalex.org/W2144211451","https://openalex.org/W2152311353","https://openalex.org/W2153579005","https://openalex.org/W2158899491","https://openalex.org/W2250539671","https://openalex.org/W2251803266","https://openalex.org/W2493916176","https://openalex.org/W2516809705","https://openalex.org/W2612953412","https://openalex.org/W2740711318","https://openalex.org/W2787560479","https://openalex.org/W2794557536","https://openalex.org/W2798812533","https://openalex.org/W2799054028","https://openalex.org/W2880875857","https://openalex.org/W2882319491","https://openalex.org/W2890466810","https://openalex.org/W2896457183","https://openalex.org/W2923014074","https://openalex.org/W2952230511","https://openalex.org/W2962739339","https://openalex.org/W2962828264","https://openalex.org/W2963026768","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2963756346","https://openalex.org/W2963918774","https://openalex.org/W4231510805","https://openalex.org/W4294170691","https://openalex.org/W4302343710","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2980729574","https://openalex.org/W1560851690","https://openalex.org/W3092047717","https://openalex.org/W4390881630","https://openalex.org/W2770162183","https://openalex.org/W3110772647","https://openalex.org/W2947721150","https://openalex.org/W2894231409","https://openalex.org/W3127365535","https://openalex.org/W2050958351"],"abstract_inverted_index":{"In":[0],"this":[1,125],"paper":[2],"we":[3],"study":[4],"the":[5,34,38,61,83,86,95,99,109,130],"performance":[6,62,126],"of":[7,16,37,60,63,82,85,88,108,116,120,133],"several":[8],"text":[9],"vectorization":[10],"algorithms":[11],"on":[12,106],"a":[13],"diverse":[14],"collection":[15],"73":[17,110],"publicly":[18],"available":[19],"datasets.":[20],"Traditional":[21],"sparse":[22,96],"vectorizers":[23,65,97],"like":[24,43,50,69],"Tf-Idf":[25],"and":[26,47,77,102,124],"Feature":[27],"Hashing":[28],"have":[29,54],"been":[30],"systematically":[31],"compared":[32],"with":[33],"latest":[35],"state":[36],"art":[39],"neural":[40,100],"word":[41,101],"embeddings":[42,49],"Word2Vec,":[44],"GloVe,":[45],"FastText":[46],"character":[48,103],"ELMo,":[51],"Flair.":[52],"We":[53],"carried":[55],"out":[56],"an":[57,113],"extensive":[58],"analysis":[59],"these":[64],"across":[66,129],"different":[67,131],"dimensions":[68,132],"classification":[70],"metrics":[71],"(.i.e.":[72],"precision,":[73],"recall,":[74],"accuracy),":[75],"dataset-size,":[76],"imbalanced":[78],"data":[79],"(in":[80,118],"terms":[81,119],"distribution":[84],"number":[87],"class":[89],"labels).":[90],"Our":[91],"experiments":[92],"reveal":[93],"that":[94],"beat":[98],"embedding":[104],"models":[105],"61":[107],"datasets":[111],"by":[112],"average":[114],"margin":[115],"3-5%":[117],"macro":[121],"f1":[122],"score)":[123],"is":[127],"consistent":[128],"comparison.":[134]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2026-01-13T01:12:25.745995","created_date":"2019-11-22T00:00:00"}
