{"id":"https://openalex.org/W4205567238","doi":"https://doi.org/10.1109/bigdata52589.2021.9671886","title":"Malware Generation with Specific Behaviors to Improve Machine Learning-based Detection","display_name":"Malware Generation with Specific Behaviors to Improve Machine Learning-based Detection","publication_year":2021,"publication_date":"2021-12-15","ids":{"openalex":"https://openalex.org/W4205567238","doi":"https://doi.org/10.1109/bigdata52589.2021.9671886"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata52589.2021.9671886","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671886","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066607086","display_name":"Michael R. Smtith","orcid":null},"institutions":[{"id":"https://openalex.org/I4210104735","display_name":"Sandia National Laboratories","ror":"https://ror.org/01apwpt12","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210104735"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Michael R. Smtith","raw_affiliation_strings":["Sandia National Laboratories, Albuquerque, NM, USA"],"affiliations":[{"raw_affiliation_string":"Sandia National Laboratories, Albuquerque, NM, USA","institution_ids":["https://openalex.org/I4210104735"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074294924","display_name":"Stephen Verzi","orcid":"https://orcid.org/0000-0003-2152-851X"},"institutions":[{"id":"https://openalex.org/I4210104735","display_name":"Sandia National Laboratories","ror":"https://ror.org/01apwpt12","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210104735"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephen J. Verzi","raw_affiliation_strings":["Sandia National Laboratories, Albuquerque, NM, USA"],"affiliations":[{"raw_affiliation_string":"Sandia National Laboratories, Albuquerque, NM, USA","institution_ids":["https://openalex.org/I4210104735"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102025929","display_name":"Nicholas Johnson","orcid":"https://orcid.org/0009-0006-0366-420X"},"institutions":[{"id":"https://openalex.org/I4401726927","display_name":"Cerebras Systems (United States)","ror":"https://ror.org/040zz8080","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726927"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nicholas T. Johnson","raw_affiliation_strings":["Cerebras Systems Inc., Los Altos, CA, USA"],"affiliations":[{"raw_affiliation_string":"Cerebras Systems Inc., Los Altos, CA, USA","institution_ids":["https://openalex.org/I4401726927"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065917738","display_name":"Xin Zhou","orcid":"https://orcid.org/0000-0003-4015-4787"},"institutions":[{"id":"https://openalex.org/I4210104735","display_name":"Sandia National Laboratories","ror":"https://ror.org/01apwpt12","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210104735"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xin Zhou","raw_affiliation_strings":["Sandia National Laboratories, Albuquerque, NM, USA"],"affiliations":[{"raw_affiliation_string":"Sandia National Laboratories, Albuquerque, NM, USA","institution_ids":["https://openalex.org/I4210104735"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013971052","display_name":"Kanad Khanna","orcid":null},"institutions":[{"id":"https://openalex.org/I4210104735","display_name":"Sandia National Laboratories","ror":"https://ror.org/01apwpt12","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210104735"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kanad Khanna","raw_affiliation_strings":["Sandia National Laboratories, Albuquerque, NM, USA"],"affiliations":[{"raw_affiliation_string":"Sandia National Laboratories, Albuquerque, NM, USA","institution_ids":["https://openalex.org/I4210104735"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035366463","display_name":"Sophie Quynn","orcid":null},"institutions":[{"id":"https://openalex.org/I192454743","display_name":"Sandia National Laboratories California","ror":"https://ror.org/058m7ey48","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1330989302","https://openalex.org/I192454743","https://openalex.org/I198811213","https://openalex.org/I198811213","https://openalex.org/I4210104735"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sophie Quynn","raw_affiliation_strings":["Sandia National Laboratories, Livermore, CA, USA"],"affiliations":[{"raw_affiliation_string":"Sandia National Laboratories, Livermore, CA, USA","institution_ids":["https://openalex.org/I192454743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067044074","display_name":"Raga Krishnakumar","orcid":"https://orcid.org/0000-0002-2479-754X"},"institutions":[{"id":"https://openalex.org/I192454743","display_name":"Sandia National Laboratories California","ror":"https://ror.org/058m7ey48","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1330989302","https://openalex.org/I192454743","https://openalex.org/I198811213","https://openalex.org/I198811213","https://openalex.org/I4210104735"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Raga Krishnakumar","raw_affiliation_strings":["Sandia National Laboratories, Livermore, CA, USA"],"affiliations":[{"raw_affiliation_string":"Sandia National Laboratories, Livermore, CA, USA","institution_ids":["https://openalex.org/I192454743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5066607086"],"corresponding_institution_ids":["https://openalex.org/I4210104735"],"apc_list":null,"apc_paid":null,"fwci":0.172,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.40228341,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"2160","last_page":"2169"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/malware","display_name":"Malware","score":0.8822083473205566},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8003228306770325},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.6772326231002808},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6622006893157959},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6395366787910461},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5074753165245056},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4900587797164917},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.43766412138938904},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4060162305831909},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3598710596561432},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33645710349082947},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.2776527404785156},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.17513102293014526}],"concepts":[{"id":"https://openalex.org/C541664917","wikidata":"https://www.wikidata.org/wiki/Q14001","display_name":"Malware","level":2,"score":0.8822083473205566},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8003228306770325},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.6772326231002808},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6622006893157959},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6395366787910461},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5074753165245056},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4900587797164917},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.43766412138938904},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4060162305831909},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3598710596561432},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33645710349082947},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.2776527404785156},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.17513102293014526},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata52589.2021.9671886","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671886","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320338291","display_name":"Sandia National Laboratories","ror":"https://ror.org/01apwpt12"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W1583912456","https://openalex.org/W2010065958","https://openalex.org/W2018367337","https://openalex.org/W2148143831","https://openalex.org/W2194775991","https://openalex.org/W2293458749","https://openalex.org/W2518866423","https://openalex.org/W2567070169","https://openalex.org/W2612449038","https://openalex.org/W2744095836","https://openalex.org/W2784652921","https://openalex.org/W2788864200","https://openalex.org/W2819579046","https://openalex.org/W2892210823","https://openalex.org/W2909482470","https://openalex.org/W2920684403","https://openalex.org/W2945404328","https://openalex.org/W2954416745","https://openalex.org/W2962770929","https://openalex.org/W2962911061","https://openalex.org/W2963165251","https://openalex.org/W2963973118","https://openalex.org/W2966316219","https://openalex.org/W2977721195","https://openalex.org/W2982631433","https://openalex.org/W3000425587","https://openalex.org/W3015889445","https://openalex.org/W3034720584","https://openalex.org/W3045755384","https://openalex.org/W3096432474","https://openalex.org/W3099088591","https://openalex.org/W3100615802","https://openalex.org/W3118382796","https://openalex.org/W3175941285","https://openalex.org/W4289761690","https://openalex.org/W4293568472","https://openalex.org/W4295312788","https://openalex.org/W4297747285","https://openalex.org/W4297779379","https://openalex.org/W4300687693","https://openalex.org/W6635084905","https://openalex.org/W6685802191","https://openalex.org/W6731370813","https://openalex.org/W6745899033","https://openalex.org/W6748641434","https://openalex.org/W6750318962","https://openalex.org/W6752910514","https://openalex.org/W6755176163","https://openalex.org/W6760031732","https://openalex.org/W6762067811","https://openalex.org/W6766978945","https://openalex.org/W6779093361","https://openalex.org/W6780507278","https://openalex.org/W6781482849","https://openalex.org/W6786797400"],"related_works":["https://openalex.org/W1975357770","https://openalex.org/W4365211920","https://openalex.org/W3014948380","https://openalex.org/W4226316650","https://openalex.org/W4380551139","https://openalex.org/W4317695495","https://openalex.org/W4287117424","https://openalex.org/W4387506531","https://openalex.org/W4238433571","https://openalex.org/W2967848559"],"abstract_inverted_index":{"We":[0],"describe":[1],"efforts":[2],"in":[3,27,46,100,109,147,191],"generating":[4],"synthetic":[5,177],"malware":[6,48,77,102,118,168,193],"samples":[7],"that":[8,12,35,42,51,113,130,176],"have":[9],"specified":[10],"behaviors":[11,26,33,40,56,71,87,111,190],"can":[13],"then":[14],"be":[15],"used":[16],"to":[17,24,54,90,181,196,203],"train":[18],"a":[19,36,156,163],"machine":[20],"learning":[21],"(ML)":[22],"algorithm":[23],"detect":[25,55],"malware.":[28,63],"The":[29],"idea":[30],"behind":[31],"detecting":[32,70],"is":[34,72,112,133,179],"set":[37],"of":[38,61,69,106,144,189],"core":[39],"exists":[41],"are":[43,116],"often":[44],"shared":[45],"many":[47,101],"variants":[49],"and":[50,154,161,185,202],"being":[52],"able":[53,180],"will":[57],"improve":[58],"the":[59,66,91,107,142,187],"detection":[60,103],"novel":[62,192],"However,":[64],"empirically":[65],"multi-label":[67],"task":[68],"significantly":[73],"more":[74],"difficult":[75],"than":[76,93],"classification,":[78],"only":[79],"achieving":[80],"on":[81],"average":[82],"84%":[83],"accuracy":[84,98],"across":[85],"all":[86],"as":[88],"opposed":[89],"greater":[92],"95%":[94],"multi-class":[95],"or":[96],"binary":[97],"reported":[99],"studies.":[104],"One":[105],"difficulties":[108],"identifying":[110],"while":[114],"there":[115,132],"ample":[117],"samples,":[119],"most":[120],"data":[121,136,178],"sources":[122],"do":[123],"not":[124],"include":[125],"behavioral":[126,183],"labels,":[127],"which":[128],"means":[129],"generally":[131],"insufficient":[134],"training":[135],"for":[137,167],"behavior":[138,171],"identification.":[139],"Inspired":[140],"by":[141],"success":[143],"generative":[145,165],"models":[146],"improving":[148],"image":[149],"processing":[150],"techniques,":[151],"we":[152],"examine":[153],"extend":[155],"1)":[157],"conditional":[158],"variational":[159],"auto-encoder":[160],"2)":[162],"flow-based":[164],"model":[166],"generation":[169],"with":[170,205],"labels.":[172],"Initial":[173],"experiments":[174],"indicate":[175],"capture":[182],"information":[184],"increase":[186],"recall":[188],"from":[194],"32%":[195],"45%":[197],"without":[198],"increasing":[199],"false":[200,207],"positives":[201],"52%":[204],"increased":[206],"positives.":[208]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
