{"id":"https://openalex.org/W2909060231","doi":"https://doi.org/10.1109/aiccsa.2018.8612855","title":"Malicious HTML File Prediction: A Detection and Classification Perspective with Noisy Data","display_name":"Malicious HTML File Prediction: A Detection and Classification Perspective with Noisy Data","publication_year":2018,"publication_date":"2018-10-01","ids":{"openalex":"https://openalex.org/W2909060231","doi":"https://doi.org/10.1109/aiccsa.2018.8612855","mag":"2909060231"},"language":"en","primary_location":{"id":"doi:10.1109/aiccsa.2018.8612855","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aiccsa.2018.8612855","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/ACS 15th International Conference on Computer Systems and Applications (AICCSA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102843280","display_name":"Samuel T. Hess","orcid":"https://orcid.org/0000-0003-2359-6540"},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Samuel Hess","raw_affiliation_strings":["NSF Center for Cloud and Autonomic Computing, University of Arizona"],"affiliations":[{"raw_affiliation_string":"NSF Center for Cloud and Autonomic Computing, University of Arizona","institution_ids":["https://openalex.org/I138006243"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028622409","display_name":"Pratik Satam","orcid":"https://orcid.org/0000-0003-3139-8333"},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pratik Satam","raw_affiliation_strings":["NSF Center for Cloud and Autonomic Computing, University of Arizona"],"affiliations":[{"raw_affiliation_string":"NSF Center for Cloud and Autonomic Computing, University of Arizona","institution_ids":["https://openalex.org/I138006243"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079638101","display_name":"Gregory Ditzler","orcid":"https://orcid.org/0000-0001-6890-0935"},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gregory Ditzler","raw_affiliation_strings":["NSF Center for Cloud and Autonomic Computing, University of Arizona"],"affiliations":[{"raw_affiliation_string":"NSF Center for Cloud and Autonomic Computing, University of Arizona","institution_ids":["https://openalex.org/I138006243"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057335897","display_name":"Salim Hariri","orcid":"https://orcid.org/0000-0003-3956-3401"},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Salim Hariri","raw_affiliation_strings":["NSF Center for Cloud and Autonomic Computing, University of Arizona"],"affiliations":[{"raw_affiliation_string":"NSF Center for Cloud and Autonomic Computing, University of Arizona","institution_ids":["https://openalex.org/I138006243"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102843280"],"corresponding_institution_ids":["https://openalex.org/I138006243"],"apc_list":null,"apc_paid":null,"fwci":0.1845,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.58187647,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8293774724006653},{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.5947319269180298},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5620722770690918},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.5435625314712524},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5206322073936462},{"id":"https://openalex.org/keywords/adaboost","display_name":"AdaBoost","score":0.5114313960075378},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.44530150294303894},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.42595988512039185},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4005235731601715},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.13107043504714966}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8293774724006653},{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.5947319269180298},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5620722770690918},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.5435625314712524},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5206322073936462},{"id":"https://openalex.org/C141404830","wikidata":"https://www.wikidata.org/wiki/Q2823869","display_name":"AdaBoost","level":3,"score":0.5114313960075378},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.44530150294303894},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.42595988512039185},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4005235731601715},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.13107043504714966},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/aiccsa.2018.8612855","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aiccsa.2018.8612855","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/ACS 15th International Conference on Computer Systems and Applications (AICCSA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W142212369","https://openalex.org/W236346275","https://openalex.org/W1475084778","https://openalex.org/W1532325895","https://openalex.org/W1662713651","https://openalex.org/W1988790447","https://openalex.org/W2011778831","https://openalex.org/W2024046085","https://openalex.org/W2026054276","https://openalex.org/W2044675702","https://openalex.org/W2129629259","https://openalex.org/W2135107501","https://openalex.org/W2148143831","https://openalex.org/W2161920802","https://openalex.org/W2261117743","https://openalex.org/W2296719434","https://openalex.org/W2330820318","https://openalex.org/W2342408547","https://openalex.org/W2460383888","https://openalex.org/W2564566221","https://openalex.org/W2604501628","https://openalex.org/W2626431566","https://openalex.org/W2763556600","https://openalex.org/W2902792801","https://openalex.org/W2911964244","https://openalex.org/W3016607949","https://openalex.org/W3085162807","https://openalex.org/W4248437541","https://openalex.org/W6609067833","https://openalex.org/W6628540086"],"related_works":["https://openalex.org/W4362588981","https://openalex.org/W4293525103","https://openalex.org/W4292869357","https://openalex.org/W3200179079","https://openalex.org/W2911198546","https://openalex.org/W4249229055","https://openalex.org/W4375930479","https://openalex.org/W3045454035","https://openalex.org/W4281616679","https://openalex.org/W4361732474"],"abstract_inverted_index":{"Cybersecurity":[0],"plays":[1],"a":[2,82,101,110,156,175,191,219,234,239,300,312],"critical":[3],"role":[4],"in":[5,22,27,104,186],"protecting":[6],"sensitive":[7],"information":[8],"and":[9,36,47,143,199,215,218,282],"the":[10,31,37,45,58,79,85,88,97,105,228,249,257,263,280,289,294,303],"structural":[11],"integrity":[12],"of":[13,33,49,72,87,96,168,242,302],"networked":[14,17],"systems.":[15],"As":[16],"systems":[18,68],"continue":[19],"to":[20,108,119,189,248,272,288],"expand":[21],"numbers":[23],"as":[24,26,55,57],"well":[25,56,117],"complexity,":[28],"so":[29],"does":[30],"threat":[32],"malicious":[34,53,61,129,137,169,179,243,283,290,304,313],"activity":[35,62],"necessity":[38],"for":[39,69,94,178],"advanced":[40],"cybersecurity":[41,165],"solutions.":[42],"Furthermore,":[43],"both":[44,279],"quantity":[46],"quality":[48,81],"available":[50],"data":[51,80,89,106],"on":[52,233,278],"content":[54],"fact":[59],"that":[60,159,183,237,256,274,299],"continuously":[63],"evolves":[64],"makes":[65],"automated":[66],"protection":[67],"this":[70,187],"type":[71],"environment":[73],"particularly":[74],"challenging.":[75],"Not":[76],"only":[77],"is":[78,127,140,160],"concern,":[83],"but":[84],"volume":[86],"can":[90,144],"be":[91,145],"quite":[92,146],"small":[93],"some":[95],"classes.":[98],"This":[99,153],"creates":[100],"class":[102,122],"imbalance":[103],"used":[107],"train":[109,277],"classifier;":[111],"however,":[112],"many":[113],"classifiers":[114,209,230],"are":[115,285],"not":[116,309],"equipped":[118],"deal":[120],"with":[121,207,246],"imbalance.":[123],"One":[124],"such":[125],"example":[126],"detecting":[128],"HMTL":[130,138],"files":[131,139,150,244,251],"from":[132,148,311],"static":[133],"features.":[134],"Unfortunately,":[135],"collecting":[136],"extremely":[141],"difficult":[142],"noisy":[147],"HTML":[149,170,180],"being":[151],"mislabeled.":[152],"paper":[154],"evaluates":[155],"specific":[157],"application":[158],"afflicted":[161],"by":[162,297],"these":[163],"modern":[164],"challenges:":[166],"detection":[167,221],"files.":[171],"Previous":[172],"work":[173,188],"presented":[174],"general":[176],"framework":[177,259],"file":[181],"classification":[182],"we":[184],"modify":[185],"use":[190],"\u03c7":[192],"<sup":[193],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[194],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>":[195],"feature":[196],"selection":[197],"technique":[198,203],"synthetic":[200],"minority":[201],"oversampling":[202],"(SMOTE).":[204],"We":[205,226,292],"experiment":[206],"different":[208,229],"(i.e.,":[210,223],"AdaBoost,":[211],"Gentle-Boost,":[212],"RobustBoost,":[213],"RusBoost,":[214],"Random":[216],"Forest)":[217],"pure":[220],"model":[222],"Isolation":[224],"Forest).":[225],"benchmark":[227],"using":[231],"SMOTE":[232],"real":[235],"dataset":[236],"contains":[238],"limited":[240],"number":[241],"(40)":[245],"respect":[247],"normal":[250,281],"(7,263).":[252],"It":[253],"was":[254,270],"found":[255,271],"modified":[258],"performed":[260],"better":[261],"than":[262],"previous":[264],"framework's":[265],"results.":[266],"However,":[267],"additional":[268],"evidence":[269],"imply":[273],"algorithms":[275],"which":[276],"samples":[284],"likely":[286,295],"overtraining":[287,296],"distribution.":[291],"demonstrate":[293],"determining":[298],"subset":[301],"files,":[305],"while":[306],"suspicious,":[307],"did":[308],"come":[310],"source.":[314]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
