{"id":"https://openalex.org/W3180104657","doi":"https://doi.org/10.1080/0952813x.2021.1907792","title":"An empirical evaluation of text representation schemes to filter the social media stream","display_name":"An empirical evaluation of text representation schemes to filter the social media stream","publication_year":2021,"publication_date":"2021-04-24","ids":{"openalex":"https://openalex.org/W3180104657","doi":"https://doi.org/10.1080/0952813x.2021.1907792","mag":"3180104657"},"language":"en","primary_location":{"id":"doi:10.1080/0952813x.2021.1907792","is_oa":false,"landing_page_url":"https://doi.org/10.1080/0952813x.2021.1907792","pdf_url":null,"source":{"id":"https://openalex.org/S153467142","display_name":"Journal of Experimental & Theoretical Artificial Intelligence","issn_l":"0952-813X","issn":["0952-813X","1362-3079"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Experimental &amp; Theoretical Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052190970","display_name":"Sandip Modha","orcid":"https://orcid.org/0000-0003-2427-2433"},"institutions":[{"id":"https://openalex.org/I4210125378","display_name":"Integrated Test Range","ror":"https://ror.org/02gvtkt16","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210125378","https://openalex.org/I4210150591"]},{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Sandip Modha","raw_affiliation_strings":["Information Retrieval and Language Processing Lab, DA-IICT Gandhinagar, India","Information Retrieval and Language Processing Lab, LDRP-ITR, Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Information Retrieval and Language Processing Lab, DA-IICT Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]},{"raw_affiliation_string":"Information Retrieval and Language Processing Lab, LDRP-ITR, Gandhinagar, India","institution_ids":["https://openalex.org/I4210125378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026950398","display_name":"Prasenjit Majumder","orcid":"https://orcid.org/0000-0003-0840-9313"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Prasenjit Majumder","raw_affiliation_strings":["Information Retrieval and Language Processing Lab, DA-IICT Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Information Retrieval and Language Processing Lab, DA-IICT Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009564435","display_name":"Thomas Mandl","orcid":"https://orcid.org/0000-0002-8398-9699"},"institutions":[{"id":"https://openalex.org/I155765044","display_name":"University of Hildesheim","ror":"https://ror.org/02f9det96","country_code":"DE","type":"education","lineage":["https://openalex.org/I155765044"]},{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["DE","IN"],"is_corresponding":false,"raw_author_name":"Thomas Mandl","raw_affiliation_strings":["Information Retrieval and Language Processing Lab, DA-IICT Gandhinagar, India","Information Retrieval and Language Processing Lab, University of Hildesheim, Hildesheim, Germany"],"affiliations":[{"raw_affiliation_string":"Information Retrieval and Language Processing Lab, DA-IICT Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]},{"raw_affiliation_string":"Information Retrieval and Language Processing Lab, University of Hildesheim, Hildesheim, Germany","institution_ids":["https://openalex.org/I155765044"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5052190970"],"corresponding_institution_ids":["https://openalex.org/I4210125378","https://openalex.org/I98389781"],"apc_list":null,"apc_paid":null,"fwci":1.3597,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.84451926,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"34","issue":"3","first_page":"499","last_page":"525"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8846359252929688},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.6338109970092773},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.624140739440918},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.6021799445152283},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.43047231435775757},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34759530425071716},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3321155607700348},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.32134246826171875},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.25485873222351074},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.13108140230178833},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.1156989336013794}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8846359252929688},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.6338109970092773},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.624140739440918},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.6021799445152283},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.43047231435775757},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34759530425071716},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3321155607700348},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32134246826171875},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.25485873222351074},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13108140230178833},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.1156989336013794},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1080/0952813x.2021.1907792","is_oa":false,"landing_page_url":"https://doi.org/10.1080/0952813x.2021.1907792","pdf_url":null,"source":{"id":"https://openalex.org/S153467142","display_name":"Journal of Experimental & Theoretical Artificial Intelligence","issn_l":"0952-813X","issn":["0952-813X","1362-3079"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Experimental &amp; Theoretical Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.75}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W80056832","https://openalex.org/W1880789213","https://openalex.org/W1966982551","https://openalex.org/W1970310061","https://openalex.org/W2003298360","https://openalex.org/W2069870183","https://openalex.org/W2087609354","https://openalex.org/W2091273188","https://openalex.org/W2153579005","https://openalex.org/W2250539671","https://openalex.org/W2473555522","https://openalex.org/W2493916176","https://openalex.org/W2496721172","https://openalex.org/W2567289819","https://openalex.org/W2595653137","https://openalex.org/W2613977835","https://openalex.org/W2740168486","https://openalex.org/W2752172973","https://openalex.org/W2780932362","https://openalex.org/W2784010253","https://openalex.org/W2794853398","https://openalex.org/W2801267439","https://openalex.org/W2882319491","https://openalex.org/W2891177506","https://openalex.org/W2912102236","https://openalex.org/W2912123473","https://openalex.org/W2913474415","https://openalex.org/W2934222232","https://openalex.org/W2953553271","https://openalex.org/W2959053582","https://openalex.org/W2962932155","https://openalex.org/W2962993339","https://openalex.org/W2963297649","https://openalex.org/W3000571327","https://openalex.org/W3013027210","https://openalex.org/W3014075148","https://openalex.org/W3043469888","https://openalex.org/W3094099575","https://openalex.org/W3104209339","https://openalex.org/W6629028937","https://openalex.org/W6688201194","https://openalex.org/W6691892052","https://openalex.org/W7034732294"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2384888906","https://openalex.org/W2144190808","https://openalex.org/W2376314740","https://openalex.org/W2366644548","https://openalex.org/W2357241418","https://openalex.org/W2115485936","https://openalex.org/W3107474891","https://openalex.org/W2153015554"],"abstract_inverted_index":{"Modeling":[0],"text":[1,19,29,34,40,70,95,113],"in":[2],"a":[3,7,42,94,105],"numerical":[4],"representation":[5,30,71,114],"is":[6,102],"prime":[8],"task":[9,16],"for":[10,166],"any":[11],"Natural":[12],"Language":[13],"Processing":[14],"downstream":[15],"such":[17,37,132,213,266],"as":[18,38,104,133,214,267],"classification.":[20],"This":[21],"paper":[22],"attempts":[23],"to":[24,237],"study":[25],"the":[26,33,146,151,167,178,191,200,228,239,242,252,256,276],"effectiveness":[27],"of":[28,45,241],"schemes":[31,72],"on":[32,74,84,93,129,142,145,150,210,224,227,275],"classification":[35,96,169,243],"task,":[36],"aggressive":[39],"detection,":[41],"special":[43],"case":[44],"Hate":[46],"speech":[47],"from":[48],"social":[49],"media.":[50],"Aggression":[51],"levels":[52],"are":[53,91,164,173,235,248],"categorized":[54],"into":[55],"three":[56],"predefined":[57],"classes,":[58],"namely:":[59],"\u2018Non-aggressive\u2019":[60],"(NAG),":[61],"\u2018Overtly":[62],"Aggressive\u2019":[63,67],"(OAG),":[64],"and":[65,87,126,162,197,271],"\u2018Covertly":[66],"(CAG).":[68],"Various":[69],"based":[73,144,226],"BoW":[75,127,203],"techniques,":[76],"word":[77,80,124,137,180,184,208,219],"embedding,":[78,81],"contextual":[79],"sentence":[82,118],"embedding":[83,125,138,181,185,220],"traditional":[85,130,211,264],"classifiers,":[86,131,265],"deep":[88,147,229],"neural":[89,148,230,246],"models":[90,139,149,158,221,247],"compared":[92],"problem.":[97],"The":[98,109],"weighted":[99,193],"F1":[100],"score":[101],"used":[103],"primary":[106],"evaluation":[107],"metric.":[108],"results":[110,172],"show":[111],"that":[112],"using":[115,186],"Googles\u2019":[116],"universal":[117],"encoder":[119],"(USE)":[120],"performs":[121],"better":[122,141,206,223,262],"than":[123,195,207,263],"techniques":[128,204],"SVM,":[134,268],"while":[135],"pre-trained":[136,155,179,187,218],"perform":[140,205,222,260],"classifiers":[143,212,225,274],"English":[152],"dataset.":[153,258,279],"Recent":[154],"transfer":[156],"learning":[157],"like":[159],"Elmo,":[160],"ULMFi,":[161],"BERT":[163],"fine-tuned":[165],"aggression":[168],"task.":[170],"However,":[171],"not":[174],"at":[175],"par":[176],"with":[177],"model.":[182],"Overall,":[183],"fastText":[188],"vectors":[189],"produces":[190],"best":[192],"F1-score":[194],"Word2Vec":[196],"Glove.":[198],"On":[199],"Hindi":[201],"dataset,":[202],"embeddings":[209],"SVM.":[215],"In":[216],"contrast,":[217],"nets.":[231],"Statistical":[232],"significance":[233,240],"tests":[234],"employed":[236],"ensure":[238],"results.":[244],"Deep":[245],"more":[249],"robust":[250],"against":[251],"bias":[253],"induced":[254],"by":[255],"training":[257],"They":[259],"substantially":[261],"logistic":[269],"regression,":[270],"Naive":[272],"Bayes":[273],"Twitter":[277],"test":[278]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
