{"id":"https://openalex.org/W4312813097","doi":"https://doi.org/10.1145/3558100.3563854","title":"Detecting malware using text documents extracted from spam email through machine learning","display_name":"Detecting malware using text documents extracted from spam email through machine learning","publication_year":2022,"publication_date":"2022-09-20","ids":{"openalex":"https://openalex.org/W4312813097","doi":"https://doi.org/10.1145/3558100.3563854"},"language":"en","primary_location":{"id":"doi:10.1145/3558100.3563854","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3558100.3563854","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd ACM Symposium on Document Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074504699","display_name":"Luis \u00c1ngel Redondo-Gutierrez","orcid":null},"institutions":[{"id":"https://openalex.org/I8833935","display_name":"Universidad de Le\u00f3n","ror":"https://ror.org/02tzt0b78","country_code":"ES","type":"education","lineage":["https://openalex.org/I8833935"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Luis \u00c1ngel Redondo-Gutierrez","raw_affiliation_strings":["Universidad de Le\u00f3n, Le\u00f3n, Spain"],"affiliations":[{"raw_affiliation_string":"Universidad de Le\u00f3n, Le\u00f3n, Spain","institution_ids":["https://openalex.org/I8833935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056204391","display_name":"Francisco J\u00e1\u00f1ez-Martino","orcid":"https://orcid.org/0000-0001-7665-6418"},"institutions":[{"id":"https://openalex.org/I8833935","display_name":"Universidad de Le\u00f3n","ror":"https://ror.org/02tzt0b78","country_code":"ES","type":"education","lineage":["https://openalex.org/I8833935"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Francisco J\u00e1\u00f1ez-Martino","raw_affiliation_strings":["Universidad de Le\u00f3n, Le\u00f3n, Spain"],"affiliations":[{"raw_affiliation_string":"Universidad de Le\u00f3n, Le\u00f3n, Spain","institution_ids":["https://openalex.org/I8833935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034818449","display_name":"Eduardo Fidalgo","orcid":"https://orcid.org/0000-0003-1202-5232"},"institutions":[{"id":"https://openalex.org/I8833935","display_name":"Universidad de Le\u00f3n","ror":"https://ror.org/02tzt0b78","country_code":"ES","type":"education","lineage":["https://openalex.org/I8833935"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Eduardo Fidalgo","raw_affiliation_strings":["Universidad de Le\u00f3n, Le\u00f3n, Spain"],"affiliations":[{"raw_affiliation_string":"Universidad de Le\u00f3n, Le\u00f3n, Spain","institution_ids":["https://openalex.org/I8833935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020546469","display_name":"Enrique Alegre","orcid":"https://orcid.org/0000-0003-2081-774X"},"institutions":[{"id":"https://openalex.org/I8833935","display_name":"Universidad de Le\u00f3n","ror":"https://ror.org/02tzt0b78","country_code":"ES","type":"education","lineage":["https://openalex.org/I8833935"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Enrique Alegre","raw_affiliation_strings":["Universidad de Le\u00f3n, Le\u00f3n, Spain"],"affiliations":[{"raw_affiliation_string":"Universidad de Le\u00f3n, Le\u00f3n, Spain","institution_ids":["https://openalex.org/I8833935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001440837","display_name":"V\u0301\u0131ctor Gonz\u00e1lez-Castro","orcid":"https://orcid.org/0000-0001-8742-3775"},"institutions":[{"id":"https://openalex.org/I8833935","display_name":"Universidad de Le\u00f3n","ror":"https://ror.org/02tzt0b78","country_code":"ES","type":"education","lineage":["https://openalex.org/I8833935"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"V\u00edctor Gonz\u00e1lez-Castro","raw_affiliation_strings":["Universidad de Le\u00f3n, Le\u00f3n, Spain"],"affiliations":[{"raw_affiliation_string":"Universidad de Le\u00f3n, Le\u00f3n, Spain","institution_ids":["https://openalex.org/I8833935"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021679576","display_name":"Roc\u00edo Al\u00e1iz-Rodr\u00edguez","orcid":"https://orcid.org/0000-0003-4164-5887"},"institutions":[{"id":"https://openalex.org/I8833935","display_name":"Universidad de Le\u00f3n","ror":"https://ror.org/02tzt0b78","country_code":"ES","type":"education","lineage":["https://openalex.org/I8833935"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Roc\u00edo Alaiz-Rodr\u00edguez","raw_affiliation_strings":["Universidad de Le\u00f3n, Le\u00f3n, Spain"],"affiliations":[{"raw_affiliation_string":"Universidad de Le\u00f3n, Le\u00f3n, Spain","institution_ids":["https://openalex.org/I8833935"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5074504699"],"corresponding_institution_ids":["https://openalex.org/I8833935"],"apc_list":null,"apc_paid":null,"fwci":0.606,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.75072651,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/malware","display_name":"Malware","score":0.8751633167266846},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7860926389694214},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.7312685251235962},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.6325284242630005},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6288707256317139},{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.5842862129211426},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5377756357192993},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4342935383319855},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3699136972427368},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.27958738803863525},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.26261335611343384},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.07281231880187988}],"concepts":[{"id":"https://openalex.org/C541664917","wikidata":"https://www.wikidata.org/wiki/Q14001","display_name":"Malware","level":2,"score":0.8751633167266846},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7860926389694214},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.7312685251235962},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.6325284242630005},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6288707256317139},{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.5842862129211426},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5377756357192993},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4342935383319855},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3699136972427368},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.27958738803863525},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.26261335611343384},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.07281231880187988},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3558100.3563854","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3558100.3563854","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd ACM Symposium on Document Engineering","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.550000011920929,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2497967381","https://openalex.org/W2498119267","https://openalex.org/W2768015477","https://openalex.org/W2797672282","https://openalex.org/W2905912674","https://openalex.org/W2942687513","https://openalex.org/W2991305957","https://openalex.org/W3022844159","https://openalex.org/W3025488764","https://openalex.org/W3133178107","https://openalex.org/W3184796286","https://openalex.org/W4210864560","https://openalex.org/W4213145421","https://openalex.org/W4226443146","https://openalex.org/W4280649903"],"related_works":["https://openalex.org/W2382433580","https://openalex.org/W2100326285","https://openalex.org/W2097492617","https://openalex.org/W2369751049","https://openalex.org/W2753240997","https://openalex.org/W1764168690","https://openalex.org/W2537959205","https://openalex.org/W2198237484","https://openalex.org/W2041122820","https://openalex.org/W2324052717"],"abstract_inverted_index":{"Spam":[0],"has":[1],"become":[2],"an":[3],"effective":[4],"way":[5],"for":[6,21,59,76],"cybercriminals":[7],"to":[8,23,119],"spread":[9],"malware.":[10],"Although":[11],"cybersecurity":[12],"agencies":[13],"and":[14,18,41,98,116],"companies":[15],"develop":[16],"products":[17],"organise":[19],"courses":[20],"people":[22],"detect":[24,120],"malicious":[25],"spam":[26,29,63],"email":[27],"patterns,":[28],"attacks":[30],"are":[31],"not":[32],"totally":[33],"avoided":[34],"yet.":[35],"In":[36],"this":[37],"work,":[38],"we":[39,89],"present":[40],"make":[42],"publicly":[43],"available":[44],"\"Spam":[45],"Email":[46],"Malware":[47],"Detection":[48],"-":[49],"600\"":[50],"(SEMD-600),":[51],"a":[52,74,139],"new":[53],"dataset,":[54],"based":[55,79],"on":[56,80],"Bruce":[57],"Guenter's,":[58],"malware":[60,77,121],"detection":[61,78],"in":[62,104,122],"using":[64],"only":[65],"the":[66,69,91,135],"text":[67,92,124],"of":[68,96,143],"email.":[70],"We":[71,126],"also":[72],"introduce":[73],"pipeline":[75],"traditional":[81],"Natural":[82],"Language":[83],"Processing":[84],"(NLP)":[85],"techniques.":[86],"Using":[87],"SEMD-600,":[88],"compare":[90],"representation":[93],"techniques":[94],"Bag":[95],"Words":[97],"Term":[99],"Frequency-Inverse":[100],"Document":[101],"Frequency":[102],"(TF-IDF),":[103],"combination":[105],"with":[106,131,138],"three":[107],"different":[108],"supervised":[109],"classifiers:":[110],"Support":[111],"Vector":[112],"Machine,":[113],"Naive":[114],"Bayes":[115],"Logistic":[117,132],"Regression,":[118],"plain":[123],"documents.":[125],"found":[127],"that":[128],"combining":[129],"TF-IDF":[130],"Regression":[133],"achieved":[134],"best":[136],"performance,":[137],"macro":[140],"F1":[141],"score":[142],"0.763.":[144]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
