{"id":"https://openalex.org/W3164313670","doi":"https://doi.org/10.1186/s40537-021-00462-6","title":"Investigating rarity in web attacks with ensemble learners","display_name":"Investigating rarity in web attacks with ensemble learners","publication_year":2021,"publication_date":"2021-05-20","ids":{"openalex":"https://openalex.org/W3164313670","doi":"https://doi.org/10.1186/s40537-021-00462-6","mag":"3164313670"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-021-00462-6","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-021-00462-6","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-021-00462-6","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-021-00462-6","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001819774","display_name":"Richard Zuech","orcid":"https://orcid.org/0000-0002-5526-1094"},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Richard Zuech","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, FL, USA"],"raw_orcid":"https://orcid.org/0000-0002-5526-1094","affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, FL, USA","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047489766","display_name":"John Hancock","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John Hancock","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, FL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, FL, USA","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089170562","display_name":"Taghi M. Khoshgoftaar","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taghi M. Khoshgoftaar","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, FL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, FL, USA","institution_ids":["https://openalex.org/I63772739"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5001819774"],"corresponding_institution_ids":["https://openalex.org/I63772739"],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":1.1193,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.82026722,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"8","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9732000231742859,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/undersampling","display_name":"Undersampling","score":0.9716378450393677},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.8500461578369141},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.777884840965271},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.6641474962234497},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6513756513595581},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6300714015960693},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.5796831846237183},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.550378680229187},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5304889678955078},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.479961633682251},{"id":"https://openalex.org/keywords/receiver-operating-characteristic","display_name":"Receiver operating characteristic","score":0.45758357644081116},{"id":"https://openalex.org/keywords/logistic-regression","display_name":"Logistic regression","score":0.4128098487854004},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.28780436515808105}],"concepts":[{"id":"https://openalex.org/C136536468","wikidata":"https://www.wikidata.org/wiki/Q1225894","display_name":"Undersampling","level":2,"score":0.9716378450393677},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.8500461578369141},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.777884840965271},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.6641474962234497},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6513756513595581},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6300714015960693},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.5796831846237183},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.550378680229187},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5304889678955078},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.479961633682251},{"id":"https://openalex.org/C58471807","wikidata":"https://www.wikidata.org/wiki/Q327120","display_name":"Receiver operating characteristic","level":2,"score":0.45758357644081116},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.4128098487854004},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.28780436515808105},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1186/s40537-021-00462-6","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-021-00462-6","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-021-00462-6","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:dfdb41dd32034055984664ba590be03e","is_oa":true,"landing_page_url":"https://doaj.org/article/dfdb41dd32034055984664ba590be03e","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Big Data, Vol 8, Iss 1, Pp 1-27 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s40537-021-00462-6","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-021-00462-6","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-021-00462-6","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6200000047683716,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G5416735113","display_name":"MRI: Acquisition of Big Data Training and Research Laboratory","funder_award_id":"1427536","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6293531692","display_name":null,"funder_award_id":"CNS-1427536","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310801","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387"},{"id":"https://openalex.org/F4320317380","display_name":"Universidad del Atl\u00e1ntico","ror":"https://ror.org/05mm1w714"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3164313670.pdf","grobid_xml":"https://content.openalex.org/works/W3164313670.grobid-xml"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W23242426","https://openalex.org/W98199129","https://openalex.org/W1680392829","https://openalex.org/W1981552604","https://openalex.org/W2015553421","https://openalex.org/W2016441490","https://openalex.org/W2039427951","https://openalex.org/W2080562691","https://openalex.org/W2088535455","https://openalex.org/W2088794999","https://openalex.org/W2092126505","https://openalex.org/W2122210511","https://openalex.org/W2147770670","https://openalex.org/W2155653793","https://openalex.org/W2157995113","https://openalex.org/W2158262405","https://openalex.org/W2279833412","https://openalex.org/W2295598076","https://openalex.org/W2312301617","https://openalex.org/W2318802957","https://openalex.org/W2789828921","https://openalex.org/W2797532987","https://openalex.org/W2891398562","https://openalex.org/W2899434936","https://openalex.org/W2909961873","https://openalex.org/W2911964244","https://openalex.org/W2912934387","https://openalex.org/W2919984788","https://openalex.org/W2965604686","https://openalex.org/W2990580840","https://openalex.org/W2997301451","https://openalex.org/W2998019206","https://openalex.org/W3002389070","https://openalex.org/W3008131571","https://openalex.org/W3013997935","https://openalex.org/W3014781180","https://openalex.org/W3021219025","https://openalex.org/W3024905798","https://openalex.org/W3035510612","https://openalex.org/W3081279800","https://openalex.org/W3102476541","https://openalex.org/W3104887532","https://openalex.org/W3108630703","https://openalex.org/W3123817474","https://openalex.org/W4230452299","https://openalex.org/W4285719527","https://openalex.org/W6604039723","https://openalex.org/W6608287133"],"related_works":["https://openalex.org/W3165582150","https://openalex.org/W4293261997","https://openalex.org/W4367336074","https://openalex.org/W3154045278","https://openalex.org/W4379620016","https://openalex.org/W4393666307","https://openalex.org/W3210764983","https://openalex.org/W4393443811","https://openalex.org/W4367335949","https://openalex.org/W3089416646"],"abstract_inverted_index":{"Abstract":[0],"Class":[1],"rarity":[2],"is":[3,204,224],"a":[4,18,190],"frequent":[5],"challenge":[6],"in":[7,54,189],"cybersecurity.":[8],"Rarity":[9],"occurs":[10],"when":[11,221],"the":[12,35,42,58,88,123,133,158,168,186,197,207],"positive":[13,43],"(attack)":[14],"class":[15,235],"only":[16],"has":[17],"small":[19],"number":[20],"of":[21,102,233],"instances":[22],"for":[23,34,91,132,166],"machine":[24],"learning":[25],"classifiers":[26,36],"to":[27,37,226],"train":[28],"upon,":[29],"thus":[30],"making":[31],"it":[32],"difficult":[33],"discriminate":[38],"and":[39,65,77,115,153,165,212,219,229,237],"learn":[40],"from":[41,57],"class.":[44],"To":[45],"investigate":[46],"rarity,":[47],"we":[48,79],"examine":[49],"three":[50,69,93,170],"individual":[51,70,94],"web":[52,71,95],"attacks":[53,72],"big":[55],"data":[56],"CSE-CIC-IDS2018":[59],"dataset:":[60],"\u201cBrute":[61,63],"Force-Web\u201d,":[62],"Force-XSS\u201d,":[64],"\u201cSQL":[66],"Injection\u201d.":[67],"These":[68],"are":[73,105,130],"also":[74],"severely":[75],"imbalanced,":[76],"so":[78],"evaluate":[80],"whether":[81],"random":[82,178],"undersampling":[83,179,203],"(RUS)":[84],"treatments":[85],"can":[86],"improve":[87,181],"classification":[89,119,183],"performance":[90,184,215],"these":[92],"attacks.":[96],"The":[97],"following":[98,134],"eight":[99],"different":[100,136],"levels":[101],"RUS":[103],"ratios":[104],"evaluated:":[106],"no":[107,222],"sampling,":[108],"999:1,":[109],"99:1,":[110],"95:5,":[111],"9:1,":[112],"3:1,":[113],"65:35,":[114],"1:1.":[116],"For":[117],"measuring":[118],"performance,":[120],"Area":[121],"Under":[122],"Receiver":[124],"Operating":[125],"Characteristic":[126],"Curve":[127],"(AUC)":[128],"metrics":[129],"obtained":[131],"seven":[135],"classifiers:":[137],"Random":[138],"Forest":[139],"(RF),":[140],"CatBoost":[141],"(CB),":[142],"LightGBM":[143],"(LGB),":[144],"XGBoost":[145],"(XGB),":[146],"Decision":[147],"Tree":[148],"(DT),":[149],"Naive":[150],"Bayes":[151],"(NB),":[152],"Logistic":[154],"Regression":[155],"(LR)":[156],"(with":[157],"first":[159],"four":[160],"learners":[161,164,195,209],"being":[162,171],"ensemble":[163,208],"comparison,":[167],"last":[169],"single":[172],"learners).":[173],"We":[174],"find":[175],"that":[176],"applying":[177],"does":[180],"overall":[182],"with":[185],"AUC":[187,199],"metric":[188],"statistically":[191],"significant":[192],"manner.":[193],"Ensemble":[194],"achieve":[196],"top":[198],"scores":[200],"after":[201],"massive":[202],"applied,":[205],"but":[206],"break":[210],"down":[211],"have":[213],"poor":[214],"(worse":[216],"than":[217],"NB":[218],"DT)":[220],"sampling":[223],"applied":[225],"our":[227],"unique":[228],"harsh":[230],"experimental":[231],"conditions":[232],"severe":[234],"imbalance":[236],"rarity.":[238]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
