{"id":"https://openalex.org/W4402813303","doi":"https://doi.org/10.1109/csr61664.2024.10679498","title":"New Approach to Shorten Feature Set via TF-IDF for Machine Learning-Based Webshell Detection","display_name":"New Approach to Shorten Feature Set via TF-IDF for Machine Learning-Based Webshell Detection","publication_year":2024,"publication_date":"2024-09-02","ids":{"openalex":"https://openalex.org/W4402813303","doi":"https://doi.org/10.1109/csr61664.2024.10679498"},"language":"en","primary_location":{"id":"doi:10.1109/csr61664.2024.10679498","is_oa":false,"landing_page_url":"https://doi.org/10.1109/csr61664.2024.10679498","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Cyber Security and Resilience (CSR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002153335","display_name":"Viet Anh Phan","orcid":"https://orcid.org/0009-0003-1787-8063"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Viet Anh Phan","raw_affiliation_strings":["Brno University of Technology,Department of Telecommunications,Brno,Czech Republic"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Brno University of Technology,Department of Telecommunications,Brno,Czech Republic","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014988055","display_name":"Jan Je\u0159\u00e1bek","orcid":"https://orcid.org/0000-0001-9487-5024"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jan Jerabek","raw_affiliation_strings":["Brno University of Technology,Department of Telecommunications,Brno,Czech Republic"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Brno University of Technology,Department of Telecommunications,Brno,Czech Republic","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113400176","display_name":"Dinh Khoa Bach Le","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dinh Khanh Le","raw_affiliation_strings":["Center 286, Cyber Command,Ho Chi Minh City,Vietnam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center 286, Cyber Command,Ho Chi Minh City,Vietnam","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067751360","display_name":"Tom\u00e1\u0161 G\u00f6tthans","orcid":"https://orcid.org/0000-0002-0386-1813"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Tomas Gotthans","raw_affiliation_strings":["Brno University of Technology,Department of Radio Electronics,Brno,Czech Republic"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Brno University of Technology,Department of Radio Electronics,Brno,Czech Republic","institution_ids":["https://openalex.org/I60587646"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.0686,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.90059032,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"50","last_page":"55"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9444000124931335,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9444000124931335,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7385547161102295},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6252367496490479},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6080561876296997},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5075968503952026},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4968443214893341},{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.44780880212783813},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40344107151031494}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7385547161102295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6252367496490479},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6080561876296997},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5075968503952026},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4968443214893341},{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.44780880212783813},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40344107151031494},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/csr61664.2024.10679498","is_oa":false,"landing_page_url":"https://doi.org/10.1109/csr61664.2024.10679498","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Cyber Security and Resilience (CSR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1965667542","https://openalex.org/W1988195734","https://openalex.org/W2119821739","https://openalex.org/W2295598076","https://openalex.org/W2301541953","https://openalex.org/W2792256830","https://openalex.org/W2917451471","https://openalex.org/W3084240427","https://openalex.org/W3154485088","https://openalex.org/W3173944302","https://openalex.org/W3175269629","https://openalex.org/W4205802229","https://openalex.org/W4285495947","https://openalex.org/W4306760315","https://openalex.org/W4307991954","https://openalex.org/W4384563052","https://openalex.org/W4394814093","https://openalex.org/W6693692819"],"related_works":["https://openalex.org/W2382433580","https://openalex.org/W2100326285","https://openalex.org/W2369751049","https://openalex.org/W2198237484","https://openalex.org/W2041122820","https://openalex.org/W2381981226","https://openalex.org/W2383777945","https://openalex.org/W2030848013","https://openalex.org/W4386159726","https://openalex.org/W4388321867"],"abstract_inverted_index":{"The":[0,226],"existence":[1],"of":[2,13,25,65,83,113,123,151,201,222,228,234,242,251],"malicious":[3,26,137],"webshells":[4],"poses":[5],"a":[6,48,124,248],"significant":[7],"threat":[8],"to":[9,46,61,101,147,196],"the":[10,66,69,81,111,121,199,208,219,232,235,239],"security":[11],"infrastructure":[12],"computer":[14],"systems,":[15],"smart":[16],"devices,":[17],"and":[18,35,120,128,136,181,191,247],"applications.":[19],"In":[20],"our":[21],"work,":[22],"prevalent":[23],"forms":[24],"webshell":[27,107,149],"scripts,":[28],"such":[29],"as":[30],"PHP,":[31],"ASP,":[32,155],"ASPX,":[33,156],"JSP":[34],"Powershell":[36],"have":[37,43],"been":[38,44],"identified.":[39],"Machine":[40,179],"learning":[41,85,165,224],"techniques":[42],"proved":[45],"be":[47],"valuable":[49],"tool":[50],"for":[51],"detecting":[52],"webshells.":[53],"Feature":[54],"reduction":[55,71,95],"has":[56],"played":[57],"an":[58,90],"important":[59],"role":[60],"overcome":[62],"excessive":[63],"features":[64,130],"dataset":[67],"in":[68,93,106],"feature":[70,94],"phase,":[72],"which":[73],"helps":[74],"reducing":[75],"computational":[76],"costs":[77],"while":[78],"still":[79],"keeping":[80],"generalization":[82],"machine":[84,164,223],"model.":[86],"This":[87],"study":[88],"introduces":[89],"innovative":[91],"approach":[92,210],"research":[96,230],"by":[97],"leveraging":[98],"regular":[99],"expressions":[100],"filter":[102],"functions":[103],"or":[104],"words":[105],"files.":[108],"Subsequently,":[109],"through":[110],"calculation":[112],"Term":[114],"Frequency-Inverse":[115],"Document":[116],"Frequency":[117],"(TF-IDF)":[118],"values":[119],"establishment":[122],"cut-off":[125,249],"point,":[126],"common":[127],"rare":[129],"lacking":[131],"distinguishing":[132],"value":[133],"between":[134],"benign":[135],"activities":[138],"are":[139,194],"eliminated.":[140],"Then,":[141],"this":[142,229],"work":[143],"extends":[144],"its":[145],"scope":[146],"perform":[148],"detection":[150],"five":[152,162],"types":[153],"(PHP,":[154],"JSP,":[157],"Powershell).":[158],"Besides,":[159],"we":[160],"utilize":[161],"distinct":[163],"models:":[166],"Random":[167],"Forest":[168],"(RF),":[169],"Extreme":[170],"Gradient":[171],"Boosting":[172],"(XGB),":[173],"Multi-layer":[174],"Perceptron":[175],"(MLP),":[176],"Support":[177],"Vector":[178],"(SVM),":[180],"Convolutional":[182],"Neural":[183],"Network":[184],"(CNN).":[185],"Computational":[186],"metrics":[187],"including":[188],"Accuracy,":[189],"F1-score":[190],"Training":[192],"time":[193,215],"examined":[195],"comprehensively":[197],"assess":[198],"efficiency":[200,233],"each":[202],"methodology.":[203],"Overall":[204],"results":[205],"shows":[206],"that":[207],"proposed":[209,236],"not":[211],"only":[212],"accelerates":[213],"computation":[214],"but":[216],"also":[217],"enhances":[218],"classification":[220],"accuracy":[221,241],"models.":[225],"outcome":[227],"underscores":[231],"methodology":[237],"with":[238],"highest":[240],"99.61%":[243],"when":[244],"utilizing":[245],"RF":[246],"point":[250],"200":[252],"(1548":[253],"features).":[254]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
