{"id":"https://openalex.org/W2993259699","doi":"https://doi.org/10.1109/kse.2019.8919368","title":"Effective Text Data Preprocessing Technique for Sentiment Analysis in Social Media Data","display_name":"Effective Text Data Preprocessing Technique for Sentiment Analysis in Social Media Data","publication_year":2019,"publication_date":"2019-10-01","ids":{"openalex":"https://openalex.org/W2993259699","doi":"https://doi.org/10.1109/kse.2019.8919368","mag":"2993259699"},"language":"en","primary_location":{"id":"doi:10.1109/kse.2019.8919368","is_oa":false,"landing_page_url":"https://doi.org/10.1109/kse.2019.8919368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 11th International Conference on Knowledge and Systems Engineering (KSE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041959618","display_name":"Saurav Pradha","orcid":null},"institutions":[{"id":"https://openalex.org/I153230381","display_name":"Charles Sturt University","ror":"https://ror.org/00wfvh315","country_code":"AU","type":"education","lineage":["https://openalex.org/I153230381"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Saurav Pradha","raw_affiliation_strings":["Charles Sturt University,School of Computing and Mathematics,Melbourne,Victoria,Australia","School of Computing and Mathematics, Charles Sturt University, Melbourne, Victoria, Australia"],"affiliations":[{"raw_affiliation_string":"Charles Sturt University,School of Computing and Mathematics,Melbourne,Victoria,Australia","institution_ids":["https://openalex.org/I153230381"]},{"raw_affiliation_string":"School of Computing and Mathematics, Charles Sturt University, Melbourne, Victoria, Australia","institution_ids":["https://openalex.org/I153230381"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053757316","display_name":"Malka N. Halgamuge","orcid":"https://orcid.org/0000-0001-9994-3778"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Malka N. Halgamuge","raw_affiliation_strings":["The University of Melbourne,Dep. of Electrical and Electronic Engineering,Victoria,Australia,3010","Dep. of Electrical and Electronic Engineering, The University of Melbourne, Victoria, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne,Dep. of Electrical and Electronic Engineering,Victoria,Australia,3010","institution_ids":["https://openalex.org/I165779595"]},{"raw_affiliation_string":"Dep. of Electrical and Electronic Engineering, The University of Melbourne, Victoria, Australia","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038716466","display_name":"Nguy\u1ec5n Tr\u1ea7n Qu\u1ed1c Vinh","orcid":"https://orcid.org/0000-0003-2281-0429"},"institutions":[{"id":"https://openalex.org/I3130697706","display_name":"Da Nang University of Technology","ror":"https://ror.org/001ydh096","country_code":"VN","type":"education","lineage":["https://openalex.org/I3130697706"]},{"id":"https://openalex.org/I3129492623","display_name":"University of Da Nang","ror":"https://ror.org/03ecpp171","country_code":"VN","type":"education","lineage":["https://openalex.org/I3129492623"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Nguyen Tran Quoc Vinh","raw_affiliation_strings":["The University of Da Nang - University of Science and Education,Faculty of Information Technology,Vietnam","Faculty of Information Technology, The University of Da Nang - University of Science and Education, Vietnam"],"affiliations":[{"raw_affiliation_string":"The University of Da Nang - University of Science and Education,Faculty of Information Technology,Vietnam","institution_ids":["https://openalex.org/I3130697706","https://openalex.org/I3129492623"]},{"raw_affiliation_string":"Faculty of Information Technology, The University of Da Nang - University of Science and Education, Vietnam","institution_ids":["https://openalex.org/I3130697706","https://openalex.org/I3129492623"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5041959618"],"corresponding_institution_ids":["https://openalex.org/I153230381"],"apc_list":null,"apc_paid":null,"fwci":6.2141,"has_fulltext":false,"cited_by_count":135,"citation_normalized_percentile":{"value":0.97101416,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11344","display_name":"Traffic Prediction and Management Techniques","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14280","display_name":"Big Data Technologies and Applications","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8257026076316833},{"id":"https://openalex.org/keywords/sentiment-analysis","display_name":"Sentiment analysis","score":0.7344000935554504},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.7230647206306458},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.6404942274093628},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.6341559886932373},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6249507069587708},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.6248069405555725},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.6231961250305176},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5612373352050781},{"id":"https://openalex.org/keywords/popularity","display_name":"Popularity","score":0.5030555129051208},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5015432834625244},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4693653881549835},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4165616035461426},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.41279250383377075},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.08646255731582642}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8257026076316833},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.7344000935554504},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.7230647206306458},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.6404942274093628},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.6341559886932373},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6249507069587708},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.6248069405555725},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.6231961250305176},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5612373352050781},{"id":"https://openalex.org/C2780586970","wikidata":"https://www.wikidata.org/wiki/Q1357284","display_name":"Popularity","level":2,"score":0.5030555129051208},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5015432834625244},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4693653881549835},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4165616035461426},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.41279250383377075},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.08646255731582642},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/kse.2019.8919368","is_oa":false,"landing_page_url":"https://doi.org/10.1109/kse.2019.8919368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 11th International Conference on Knowledge and Systems Engineering (KSE)","raw_type":"proceedings-article"},{"id":"pmh:oai:jupiter.its.unimelb.edu.au:11343/233313","is_oa":false,"landing_page_url":"http://hdl.handle.net/11343/233313","pdf_url":null,"source":{"id":"https://openalex.org/S4377196259","display_name":"Minerva Access (University of Melbourne)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I165779595","host_organization_name":"The University of Melbourne","host_organization_lineage":["https://openalex.org/I165779595"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE International Conference on Knowledge and Systems Engineering","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2023464902","https://openalex.org/W2590061102","https://openalex.org/W2616162435","https://openalex.org/W2766448074","https://openalex.org/W2780266336","https://openalex.org/W2811103702","https://openalex.org/W2902453863","https://openalex.org/W2910048657","https://openalex.org/W2934012051"],"related_works":["https://openalex.org/W3021501837","https://openalex.org/W2989490741","https://openalex.org/W3092506759","https://openalex.org/W2367545121","https://openalex.org/W4248881655","https://openalex.org/W2482165163","https://openalex.org/W3010890513","https://openalex.org/W120741642","https://openalex.org/W138569904","https://openalex.org/W2390914021"],"abstract_inverted_index":{"In":[0,107,238],"the":[1,24,72,95,117,134,143,146,153,163,203,206,225,229,234,243,248,255,274,291,298,302],"big":[2],"data":[3,5,22,48,63,118,175,219,283],"era,":[4],"is":[6],"made":[7],"in":[8,98,197,290],"real-time":[9],"or":[10,26],"closer":[11],"to":[12,30,54,70,85,132,141,151,233,254],"real-time.":[13],"Thus,":[14],"businesses":[15],"can":[16],"utilize":[17],"this":[18,57,108],"evergrowing":[19],"volume":[20],"of":[21,42,100,102,145,165,199,205,240,250,258,297],"for":[23],"data-driven":[25],"information-driven":[27],"decision-making":[28],"process":[29,86],"improve":[31],"their":[32],"businesses.":[33],"Social":[34],"media,":[35],"like":[36],"Twitter,":[37],"generates":[38],"an":[39,60,68,91,139],"enormous":[40],"amount":[41],"such":[43,125],"data.":[44,88,260,305],"However,":[45],"social":[46],"media":[47],"are":[49],"often":[50],"unstructured":[51,256,303],"and":[52,66,80,104,129,148,160,170,178,214,294],"difficult":[53],"manage.":[55],"Hence,":[56],"study":[58],"proposes":[59],"effective":[61],"text":[62,150,218,282],"preprocessing":[64,114,176,284],"technique":[65,194,285],"develop":[67,90,138],"algorithm":[69,92,140,177,207,230],"train":[71,184],"Support":[73],"Vector":[74],"Machine":[75],"(SVM),":[76],"Deep":[77],"Learning":[78],"(DL)":[79],"Na\u00efve":[81],"Bayes":[82],"(NB)":[83],"classifiers":[84],"Twitter":[87,121,158,259,304],"We":[89,155],"that":[93,192,224,280],"weights":[94],"sentiment":[96,179],"score":[97,181],"terms":[99,198,239],"weight":[101,142,180],"hashtag":[103,147],"cleaned":[105,149],"text.":[106,275],"study,":[109],"we":[110,161,183],"(i)":[111],"compare":[112],"different":[113],"techniques":[115,124],"on":[116,272],"collected":[119],"from":[120],"using":[122,301],"various":[123],"as":[126],"(stemming,":[127],"lemmatization":[128],"spelling":[130],"correction)":[131],"obtain":[133,152],"efficient":[135],"method":[136],"(ii)":[137],"scores":[144],"sentiment.":[154],"retrieved":[156],"N=1,314,000":[157],"data,":[159],"compared":[162],"popularity":[164],"two":[166],"products,":[167],"Google":[168],"Now":[169],"Amazon":[171],"Alexa.":[172],"Using":[173],"our":[174,277],"algorithm,":[182],"SVM,":[185],"DL,":[186],"NB":[187],"models.":[188],"The":[189,221],"results":[190],"show":[191],"stemming":[193],"performed":[195,245],"best":[196],"computational":[200,295],"speed.":[201],"Additionally,":[202],"accuracy":[204,249,293],"was":[208,231],"tested":[209],"against":[210],"manually":[211,235],"sorted":[212],"sentiments":[213,215],"produced":[216,227],"before":[217],"preprocessing.":[220],"result":[222],"demonstrated":[223],"impact":[226],"by":[228],"close":[232],"annotated":[236],"sentiments.":[237],"model":[241],"performance,":[242],"SVM":[244],"better":[246],"with":[247],"90.3%,":[251],"perhaps,":[252],"due":[253],"nature":[257],"Previous":[261],"studies":[262],"used":[263],"conventional":[264],"techniques;":[265],"hence,":[266],"no":[267],"precise":[268],"methods":[269],"were":[270],"utilized":[271],"cleaning":[273],"Therefore,":[276],"approach":[278],"confirms":[279],"proper":[281],"plays":[286],"a":[287],"significant":[288],"role":[289],"prediction":[292],"time":[296],"classifier":[299],"when":[300]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":32},{"year":2023,"cited_by_count":36},{"year":2022,"cited_by_count":27},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":5},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
