{"id":"https://openalex.org/W4386883724","doi":"https://doi.org/10.1109/iciis58898.2023.10253582","title":"Empirical Analysis for the Selection of Baseline Performances for Short Text Classification","display_name":"Empirical Analysis for the Selection of Baseline Performances for Short Text Classification","publication_year":2023,"publication_date":"2023-08-25","ids":{"openalex":"https://openalex.org/W4386883724","doi":"https://doi.org/10.1109/iciis58898.2023.10253582"},"language":"en","primary_location":{"id":"doi:10.1109/iciis58898.2023.10253582","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/iciis58898.2023.10253582","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 17th International Conference on Industrial and Information Systems (ICIIS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032043837","display_name":"JRKC Jayakody","orcid":null},"institutions":[{"id":"https://openalex.org/I175606534","display_name":"Wayamba University of Sri Lanka","ror":"https://ror.org/043yykt67","country_code":"LK","type":"education","lineage":["https://openalex.org/I175606534"]}],"countries":["LK"],"is_corresponding":false,"raw_author_name":"JRKC Jayakody","raw_affiliation_strings":["University of Wayamba,Department of Computing and Information System,Sri Lanka","Department of Computing and Information System, University of Wayamba, Sri Lanka"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Wayamba,Department of Computing and Information System,Sri Lanka","institution_ids":["https://openalex.org/I175606534"]},{"raw_affiliation_string":"Department of Computing and Information System, University of Wayamba, Sri Lanka","institution_ids":["https://openalex.org/I175606534"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026468134","display_name":"V.G. Tharinda Nishantha Vidanagama","orcid":null},"institutions":[{"id":"https://openalex.org/I175606534","display_name":"Wayamba University of Sri Lanka","ror":"https://ror.org/043yykt67","country_code":"LK","type":"education","lineage":["https://openalex.org/I175606534"]}],"countries":["LK"],"is_corresponding":false,"raw_author_name":"VGTN Vidanagama","raw_affiliation_strings":["University of Wayamba,Department of Computing and Information System,Sri Lanka","Department of Computing and Information System, University of Wayamba, Sri Lanka"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Wayamba,Department of Computing and Information System,Sri Lanka","institution_ids":["https://openalex.org/I175606534"]},{"raw_affiliation_string":"Department of Computing and Information System, University of Wayamba, Sri Lanka","institution_ids":["https://openalex.org/I175606534"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008036750","display_name":"Indika Perera","orcid":"https://orcid.org/0000-0001-5660-248X"},"institutions":[{"id":"https://openalex.org/I195740183","display_name":"University of Moratuwa","ror":"https://ror.org/0491f5305","country_code":"LK","type":"education","lineage":["https://openalex.org/I195740183"]}],"countries":["LK"],"is_corresponding":false,"raw_author_name":"Indika Perera","raw_affiliation_strings":["University of Moratuwa,Department of Computer Science and Engineering,Sri Lanka","Department of Computer Science and Engineering, University of Moratuwa, Sri Lanka"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Moratuwa,Department of Computer Science and Engineering,Sri Lanka","institution_ids":["https://openalex.org/I195740183"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Moratuwa, Sri Lanka","institution_ids":["https://openalex.org/I195740183"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108829230","display_name":"H. M. L. K. Herath","orcid":null},"institutions":[{"id":"https://openalex.org/I175606534","display_name":"Wayamba University of Sri Lanka","ror":"https://ror.org/043yykt67","country_code":"LK","type":"education","lineage":["https://openalex.org/I175606534"]}],"countries":["LK"],"is_corresponding":false,"raw_author_name":"HMLK Herath","raw_affiliation_strings":["University of Wayamba,Department of Agribusiness Management,Sri Lanka","Department of Agribusiness Management, University of Wayamba, Sri Lanka"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Wayamba,Department of Agribusiness Management,Sri Lanka","institution_ids":["https://openalex.org/I175606534"]},{"raw_affiliation_string":"Department of Agribusiness Management, University of Wayamba, Sri Lanka","institution_ids":["https://openalex.org/I175606534"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1171313,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"350","issue":null,"first_page":"335","last_page":"340"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.771162748336792},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7187867760658264},{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.6743988394737244},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.573764443397522},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.573573648929596},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5714412927627563},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5679478645324707},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5328519940376282},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5304690599441528},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.5266363620758057},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5265015959739685},{"id":"https://openalex.org/keywords/statistical-classification","display_name":"Statistical classification","score":0.428760290145874},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4260767102241516},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.4250809848308563},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3900049924850464},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.1162436306476593},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09823939204216003}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.771162748336792},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7187867760658264},{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.6743988394737244},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.573764443397522},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.573573648929596},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5714412927627563},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5679478645324707},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5328519940376282},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5304690599441528},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.5266363620758057},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5265015959739685},{"id":"https://openalex.org/C110083411","wikidata":"https://www.wikidata.org/wiki/Q1744628","display_name":"Statistical classification","level":2,"score":0.428760290145874},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4260767102241516},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.4250809848308563},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3900049924850464},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.1162436306476593},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09823939204216003},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iciis58898.2023.10253582","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/iciis58898.2023.10253582","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 17th International Conference on Industrial and Information Systems (ICIIS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6100000143051147,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1551219252","https://openalex.org/W1960685374","https://openalex.org/W2014545475","https://openalex.org/W2049040426","https://openalex.org/W2071106922","https://openalex.org/W2076854725","https://openalex.org/W2137082019","https://openalex.org/W2156741031","https://openalex.org/W2250539671","https://openalex.org/W2338833146","https://openalex.org/W2562617836","https://openalex.org/W2590061102","https://openalex.org/W2805951488","https://openalex.org/W2889329323","https://openalex.org/W2896457183","https://openalex.org/W2899559665","https://openalex.org/W2913338690","https://openalex.org/W2947323695","https://openalex.org/W2954938346","https://openalex.org/W2964022793","https://openalex.org/W2965349765","https://openalex.org/W3015597294","https://openalex.org/W3016206532","https://openalex.org/W3021053579","https://openalex.org/W3184324824","https://openalex.org/W4205294488","https://openalex.org/W4205443225","https://openalex.org/W4292779060","https://openalex.org/W6755207826","https://openalex.org/W6778883912","https://openalex.org/W6805096327"],"related_works":["https://openalex.org/W2372663604","https://openalex.org/W2144982634","https://openalex.org/W2345274719","https://openalex.org/W2962783287","https://openalex.org/W2197164549","https://openalex.org/W2178383460","https://openalex.org/W2755164039","https://openalex.org/W2941340404","https://openalex.org/W4200377927","https://openalex.org/W2809278097"],"abstract_inverted_index":{"Improvements":[0],"of":[1,15,162,197,212,229,239],"classification":[2,33,47,170,219,249],"performance":[3,48,67],"for":[4,105],"short":[5,45,108,127],"text":[6,46,109,128],"have":[7],"become":[8],"increasingly":[9],"popular":[10],"due":[11],"to":[12,30,44,64,75,89,119,192,206,216,246],"explosive":[13],"growth":[14],"social":[16],"media":[17],"and":[18,98,149],"other":[19,193],"online":[20],"communication":[21],"platforms.":[22],"As":[23],"a":[24,36,117],"result,":[25],"recent":[26],"research":[27,41,84,241,253],"works":[28,42],"attempt":[29],"improve":[31,217],"the":[32,66,80,91,99,106,160,169,218],"performances":[34,171,190,220,250],"over":[35],"selected":[37],"baseline.":[38],"Unfortunately,":[39],"latest":[40,121],"related":[43],"improvements":[49,68],"with":[50,69,79,152,184,251],"Neural":[51],"networks":[52],"[CNN,":[53],"LSTM,RNN]":[54],"and,":[55],"Embeddings":[56],"as":[57,116,145,157,159],"feature":[58,94,141,199,222],"representation":[59,142,223],"haven't":[60],"use":[61],"common":[62],"baseline":[63,118,248],"compare":[65,76,120],"their":[70],"experiments.":[71],"Therefore,":[72],"it's":[73],"hard":[74],"one":[77],"work":[78,85,242],"other.":[81],"Hence":[82],"this":[83],"was":[86],"carried":[87],"out":[88,196,228],"identify":[90],"appropriate":[92],"n-grams,":[93],"representation,":[95],"preprocessing":[96],"technique":[97],"machine":[100,175,231],"learning":[101,176,232],"algorithm":[102],"which":[103,112,131],"suits":[104],"given":[107],"type":[110,224],"dataset":[111],"can":[113,243],"be":[114,244],"used":[115,125,245],"experimented":[122],"results.":[123,237],"We":[124,135],"seven":[126],"types":[129],"datasets":[130],"are":[132],"publicly":[133],"available.":[134],"compared":[136,168,191,205],"different":[137,173],"n-grams":[138],"[1-gram,2-gram,3-gram":[139],"etc],":[140],"techniques":[143,156,201],"such":[144],"[TF]":[146],"term":[147],"frequency":[148,151,155],"[TF-IDF]term":[150],"inverse":[153],"document":[154],"well":[158,204],"impact":[161],"using":[163],"stop":[164,213],"words.":[165],"Moreover,":[166,227],"we":[167],"among":[172],"traditional":[174,230],"based":[177],"algorithms.":[178],"Our":[179],"conclusions":[180],"are,":[181],"combining":[182],"1-gram":[183],"2-gram":[185],"word":[186,214],"features":[187],"gave":[188,235],"high":[189],"n-grams.":[194],"[2]":[195],"two":[198],"representations":[200],"TF":[202,207],"perform":[203],"IDF.":[208],"[3]":[209],"further,":[210],"removal":[211],"support":[215],"when":[221],"is":[225],"TF.[4]":[226],"algorithms,":[233],"SVM":[234],"good":[236],"Finding":[238],"our":[240],"compute":[247],"future":[252],"work.":[254]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
