{"id":"https://openalex.org/W4411523109","doi":"https://doi.org/10.1145/3728886","title":"ALMOND: Learning an Assembly Language Model for 0-Shot Code Obfuscation Detection","display_name":"ALMOND: Learning an Assembly Language Model for 0-Shot Code Obfuscation Detection","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4411523109","doi":"https://doi.org/10.1145/3728886"},"language":"en","primary_location":{"id":"doi:10.1145/3728886","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3728886","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3728886","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058972082","display_name":"Xuezixiang Li","orcid":"https://orcid.org/0009-0005-9713-3815"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xuezixiang Li","raw_affiliation_strings":["University of California at Riverside, Riverside, USA"],"raw_orcid":"https://orcid.org/0009-0005-9713-3815","affiliations":[{"raw_affiliation_string":"University of California at Riverside, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Sheng Yu","orcid":"https://orcid.org/0000-0002-5189-7140"},"institutions":[{"id":"https://openalex.org/I4210157613","display_name":"Riverside Technology (United States)","ror":"https://ror.org/05ey1mh42","country_code":"US","type":"company","lineage":["https://openalex.org/I4210157613"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sheng Yu","raw_affiliation_strings":["Deepbits Technology, Riverside, USA"],"raw_orcid":"https://orcid.org/0000-0002-5189-7140","affiliations":[{"raw_affiliation_string":"Deepbits Technology, Riverside, USA","institution_ids":["https://openalex.org/I4210157613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073376805","display_name":"Heng Yin","orcid":"https://orcid.org/0000-0002-8942-7742"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heng Yin","raw_affiliation_strings":["University of California at Riverside, Riverside, USA"],"raw_orcid":"https://orcid.org/0000-0002-8942-7742","affiliations":[{"raw_affiliation_string":"University of California at Riverside, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5058972082"],"corresponding_institution_ids":["https://openalex.org/I103635307"],"apc_list":null,"apc_paid":null,"fwci":1.1416,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.77831289,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"2","issue":"ISSTA","first_page":"366","last_page":"387"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/obfuscation","display_name":"Obfuscation","score":0.9281275868415833},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8135555982589722},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6470977067947388},{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.6356903314590454},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.631607174873352},{"id":"https://openalex.org/keywords/opcode","display_name":"Opcode","score":0.587118923664093},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4855915904045105},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4849410355091095},{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.4280087947845459},{"id":"https://openalex.org/keywords/assembly-language","display_name":"Assembly language","score":0.42123907804489136},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.41819655895233154},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4147747755050659},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.41181015968322754},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.40145522356033325},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3847198188304901},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.2876077890396118},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16235238313674927},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.15264096856117249},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07587593793869019}],"concepts":[{"id":"https://openalex.org/C40305131","wikidata":"https://www.wikidata.org/wiki/Q2616305","display_name":"Obfuscation","level":2,"score":0.9281275868415833},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8135555982589722},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6470977067947388},{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.6356903314590454},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.631607174873352},{"id":"https://openalex.org/C52173422","wikidata":"https://www.wikidata.org/wiki/Q766483","display_name":"Opcode","level":2,"score":0.587118923664093},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4855915904045105},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4849410355091095},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.4280087947845459},{"id":"https://openalex.org/C50831359","wikidata":"https://www.wikidata.org/wiki/Q165436","display_name":"Assembly language","level":3,"score":0.42123907804489136},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.41819655895233154},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4147747755050659},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.41181015968322754},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.40145522356033325},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3847198188304901},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2876077890396118},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16235238313674927},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.15264096856117249},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07587593793869019},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3728886","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3728886","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3728886","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3728886","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1516836232","https://openalex.org/W1565441035","https://openalex.org/W2024170198","https://openalex.org/W2076758681","https://openalex.org/W2144211451","https://openalex.org/W2150423842","https://openalex.org/W2282866165","https://openalex.org/W2314464932","https://openalex.org/W2525778437","https://openalex.org/W2560204201","https://openalex.org/W2767094836","https://openalex.org/W2810872891","https://openalex.org/W2885072546","https://openalex.org/W2887787078","https://openalex.org/W2901689459","https://openalex.org/W2926178846","https://openalex.org/W2945413494","https://openalex.org/W2947883149","https://openalex.org/W2962830343","https://openalex.org/W2963804400","https://openalex.org/W2992467173","https://openalex.org/W2997915791","https://openalex.org/W3003243900","https://openalex.org/W3005641848","https://openalex.org/W3089028909","https://openalex.org/W3089412163","https://openalex.org/W3129166376","https://openalex.org/W3133719257","https://openalex.org/W3194813479","https://openalex.org/W3198685994","https://openalex.org/W4226128225","https://openalex.org/W4231934124","https://openalex.org/W4234480284","https://openalex.org/W4247464060","https://openalex.org/W4281385582","https://openalex.org/W4281744374","https://openalex.org/W4285271823","https://openalex.org/W4285586654","https://openalex.org/W4308632257","https://openalex.org/W4311166089","https://openalex.org/W4313203541","https://openalex.org/W4362615084","https://openalex.org/W4391887109","https://openalex.org/W4398239361","https://openalex.org/W4401113920","https://openalex.org/W4401537194"],"related_works":["https://openalex.org/W3211159634","https://openalex.org/W4388157251","https://openalex.org/W3126761238","https://openalex.org/W4214835142","https://openalex.org/W2511120801","https://openalex.org/W2947729775","https://openalex.org/W982030367","https://openalex.org/W2008514616","https://openalex.org/W2498457261","https://openalex.org/W4392639644"],"abstract_inverted_index":{"Code":[0],"obfuscation":[1,81,94,176],"is":[2,135],"a":[3,87,115,141],"technique":[4],"used":[5],"to":[6,13,124,151,159],"protect":[7],"software":[8],"by":[9,130],"making":[10],"it":[11,19,113,185],"difficult":[12],"understand":[14],"and":[15,47,60,72,77,191,201],"reverse":[16],"engineer.":[17],"However,":[18,51],"can":[20],"also":[21],"be":[22,206],"exploited":[23],"for":[24,91,110],"malicious":[25,33],"purposes":[26],"such":[27],"as":[28,140],"code":[29,93,123,202],"plagiarism":[30],"or":[31],"developing":[32],"programs.":[34],"Learning-based":[35],"techniques":[36],"have":[37],"achieved":[38],"great":[39],"success":[40],"with":[41,54],"the":[42,126,136,148,194],"help":[43],"of":[44,80,138,165,189,203],"supervised":[45,64,100,179],"learning":[46,65,101],"labeled":[48,107],"training":[49],"sets.":[50],"when":[52,74],"faced":[53],"real-life":[55],"environments":[56],"involving":[57],"privately":[58],"developed":[59],"undisclosed":[61],"obfuscators,":[62],"these":[63],"methods":[66],"often":[67],"raise":[68],"concerns":[69],"about":[70],"generalizability":[71],"robustness":[73],"facing":[75],"unseen":[76,175],"unknown":[78],"classes":[79],"techniques.":[82],"This":[83],"paper":[84],"presents":[85],"ALMOND,":[86],"novel":[88],"zero-shot":[89],"approach":[90],"detecting":[92],"in":[95],"binary":[96],"executables.":[97],"Unlike":[98],"previous":[99],"methods,":[102,177],"ALMOND":[103,170],"does":[104],"not":[105],"require":[106],"obfuscated":[108,166],"samples":[109],"training.":[111],"Instead,":[112],"leverages":[114],"language":[116],"model":[117,149],"pre-trained":[118,199],"only":[119],"on":[120,146,174],"unobfuscated":[121],"assembly":[122],"identify":[125],"linguistic":[127],"deviations":[128],"introduced":[129],"obfuscation.":[131],"The":[132],"key":[133],"innovation":[134],"use":[137],"\u201derror-perplexity\u201d":[139],"detection":[142],"metric,":[143],"which":[144],"focuses":[145],"tokens":[147],"fails":[150],"predict.":[152],"Continuous":[153],"Error":[154],"Perplexity":[155],"further":[156],"enhances":[157],"this":[158],"capture":[160],"consecutive":[161],"prediction":[162],"errors":[163],"characteristic":[164],"sequences.":[167],"Experiments":[168],"show":[169],"achieves":[171],"96.3%":[172],"accuracy":[173],"outperforming":[178],"baselines.":[180],"On":[181],"real-world":[182],"malware":[183],"samples,":[184],"demonstrates":[186],"an":[187],"AUC":[188],"0.869":[190],"significantly":[192],"outperforms":[193],"supervise-learning":[195],"baseline.":[196],"Our":[197],"Dataset,":[198],"model,":[200],"evaluation":[204],"will":[205],"available":[207],"at":[208],"https://github.com/palmtreemodel/ALMOND":[209]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
