{"id":"https://openalex.org/W4318147863","doi":"https://doi.org/10.1109/bigdata55660.2022.10020797","title":"Unsupervised Learning Approaches for Construction of Malware Families","display_name":"Unsupervised Learning Approaches for Construction of Malware Families","publication_year":2022,"publication_date":"2022-12-17","ids":{"openalex":"https://openalex.org/W4318147863","doi":"https://doi.org/10.1109/bigdata55660.2022.10020797"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata55660.2022.10020797","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata55660.2022.10020797","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061325731","display_name":"Alexis J. Jackson","orcid":null},"institutions":[{"id":"https://openalex.org/I204593131","display_name":"College of Charleston","ror":"https://ror.org/00390t168","country_code":"US","type":"education","lineage":["https://openalex.org/I204593131"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Alexis J. Jackson","raw_affiliation_strings":["College of Charleston South,Department of Computer Science,Carolina,US","Department of Computer Science, College of Charleston South, Carolina, US"],"affiliations":[{"raw_affiliation_string":"College of Charleston South,Department of Computer Science,Carolina,US","institution_ids":["https://openalex.org/I204593131"]},{"raw_affiliation_string":"Department of Computer Science, College of Charleston South, Carolina, US","institution_ids":["https://openalex.org/I204593131"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101771002","display_name":"Krishnendu Ghosh","orcid":"https://orcid.org/0000-0002-8471-6537"},"institutions":[{"id":"https://openalex.org/I204593131","display_name":"College of Charleston","ror":"https://ror.org/00390t168","country_code":"US","type":"education","lineage":["https://openalex.org/I204593131"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Krishnendu Ghosh","raw_affiliation_strings":["College of Charleston South,Department of Computer Science,Carolina,US","Department of Computer Science, College of Charleston South, Carolina, US"],"affiliations":[{"raw_affiliation_string":"College of Charleston South,Department of Computer Science,Carolina,US","institution_ids":["https://openalex.org/I204593131"]},{"raw_affiliation_string":"Department of Computer Science, College of Charleston South, Carolina, US","institution_ids":["https://openalex.org/I204593131"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5061325731"],"corresponding_institution_ids":["https://openalex.org/I204593131"],"apc_list":null,"apc_paid":null,"fwci":0.2455,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.45118483,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"2989","last_page":"2996"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/malware","display_name":"Malware","score":0.9404988288879395},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.782422661781311},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.6710984706878662},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5529836416244507},{"id":"https://openalex.org/keywords/malware-analysis","display_name":"Malware analysis","score":0.5339884161949158},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.5209542512893677},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5057158470153809},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4285547435283661},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4239957332611084},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.36486440896987915},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.23590216040611267},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.20037898421287537}],"concepts":[{"id":"https://openalex.org/C541664917","wikidata":"https://www.wikidata.org/wiki/Q14001","display_name":"Malware","level":2,"score":0.9404988288879395},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.782422661781311},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.6710984706878662},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5529836416244507},{"id":"https://openalex.org/C2779395397","wikidata":"https://www.wikidata.org/wiki/Q15731404","display_name":"Malware analysis","level":3,"score":0.5339884161949158},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.5209542512893677},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5057158470153809},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4285547435283661},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4239957332611084},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36486440896987915},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.23590216040611267},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.20037898421287537},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata55660.2022.10020797","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata55660.2022.10020797","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1503224444","https://openalex.org/W1525451939","https://openalex.org/W1544837488","https://openalex.org/W1581009051","https://openalex.org/W1971421925","https://openalex.org/W2111038628","https://openalex.org/W2121749752","https://openalex.org/W2127048411","https://openalex.org/W2131523719","https://openalex.org/W2131681506","https://openalex.org/W2132874238","https://openalex.org/W2137365926","https://openalex.org/W2144112223","https://openalex.org/W2150795982","https://openalex.org/W2187910102","https://openalex.org/W2261354734","https://openalex.org/W2557513839","https://openalex.org/W2618599501","https://openalex.org/W2732916693","https://openalex.org/W2752929869","https://openalex.org/W2806678366","https://openalex.org/W2900633536","https://openalex.org/W2905912674","https://openalex.org/W2961929868","https://openalex.org/W2962688774","https://openalex.org/W2982427769","https://openalex.org/W3023279601","https://openalex.org/W3045322569","https://openalex.org/W3099354464","https://openalex.org/W3099768174","https://openalex.org/W3102641634","https://openalex.org/W4300972744","https://openalex.org/W6678051712","https://openalex.org/W6686901443","https://openalex.org/W6744657187","https://openalex.org/W6753153400","https://openalex.org/W6784472380"],"related_works":["https://openalex.org/W2768892939","https://openalex.org/W2469507153","https://openalex.org/W2008790809","https://openalex.org/W2134874482","https://openalex.org/W4285507391","https://openalex.org/W2397240470","https://openalex.org/W2602767565","https://openalex.org/W170652726","https://openalex.org/W2883822334","https://openalex.org/W3164408430"],"abstract_inverted_index":{"Construction":[0],"of":[1,7,15,18,25,38,70,73,82,84,94,130,132],"malware":[2,8,19,26,39,95,121,133,135],"families":[3,20,40],"from":[4,134],"behavioral":[5,16],"properties":[6,17],"in":[9,23,127],"wild":[10],"is":[11,21,33,76],"challenging.":[12],"The":[13,45,78,123],"understanding":[14],"key":[22],"detection":[24,90,129],"attacks.":[27],"In":[28],"this":[29],"work,":[30],"a":[31,105],"methodology":[32,101],"described":[34],"for":[35,92],"unsupervised":[36],"construction":[37,93],"using":[41],"two":[42],"phase":[43,47],"approach.":[44],"first":[46],"includes":[48],"natural":[49],"language":[50],"processing":[51],"techniques":[52],"such":[53],"as":[54],"term-frequency":[55],"and":[56,88,108],"inverse":[57],"document":[58],"frequency":[59],"are":[60,102,111,116,125],"applied":[61],"on":[62,104,118],"trace":[63,74],"data":[64],"to":[65],"compute":[66],"similarities.":[67],"A":[68],"graph":[69],"textual":[71],"similarities":[72],"sequences":[75],"constructed.":[77],"second":[79],"phase,":[80],"consists":[81],"application":[83],"minimum":[85],"spanning":[86],"tree":[87],"community":[89],"algorithms":[91,115],"families.":[96,122,136],"Experiments":[97],"employing":[98],"the":[99,109,119],"proposed":[100],"conducted":[103],"published":[106],"dataset":[107],"results":[110,124],"reported.":[112],"Machine":[113],"learning":[114],"evaluated":[117],"constructed":[120],"promising":[126],"automated":[128],"variants":[131]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
