{"id":"https://openalex.org/W3133719257","doi":"https://doi.org/10.1145/3460120.3484587","title":"PalmTree: Learning an Assembly Language Model for Instruction Embedding","display_name":"PalmTree: Learning an Assembly Language Model for Instruction Embedding","publication_year":2021,"publication_date":"2021-11-12","ids":{"openalex":"https://openalex.org/W3133719257","doi":"https://doi.org/10.1145/3460120.3484587","mag":"3133719257"},"language":"en","primary_location":{"id":"doi:10.1145/3460120.3484587","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3460120.3484587","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3460120.3484587","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3460120.3484587","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Xuezixiang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xuezixiang Li","raw_affiliation_strings":["University of California, Riverside, Riverside, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Riverside, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yu Qu","orcid":null},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Qu","raw_affiliation_strings":["University of California, Riverside, Riverside, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Riverside, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"last","author":{"id":null,"display_name":"Heng Yin","orcid":null},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heng Yin","raw_affiliation_strings":["University of California, Riverside, Riverside, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Riverside, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I103635307"],"apc_list":null,"apc_paid":null,"fwci":22.1975,"has_fulltext":true,"cited_by_count":139,"citation_normalized_percentile":{"value":0.99495731,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3236","last_page":"3251"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6769000291824341},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5511000156402588},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.5449000000953674},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5284000039100647},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.49230000376701355},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4650999903678894},{"id":"https://openalex.org/keywords/assembly-language","display_name":"Assembly language","score":0.4219000041484833},{"id":"https://openalex.org/keywords/binary-code","display_name":"Binary code","score":0.4165000021457672},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4101000130176544}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8137999773025513},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6769000291824341},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5618000030517578},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5511000156402588},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.5449000000953674},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5284000039100647},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.49230000376701355},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4650999903678894},{"id":"https://openalex.org/C50831359","wikidata":"https://www.wikidata.org/wiki/Q165436","display_name":"Assembly language","level":3,"score":0.4219000041484833},{"id":"https://openalex.org/C63435697","wikidata":"https://www.wikidata.org/wiki/Q864135","display_name":"Binary code","level":3,"score":0.4165000021457672},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4101000130176544},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.40939998626708984},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.39410001039505005},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3855000138282776},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38040000200271606},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.36880001425743103},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.3610000014305115},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.3495999872684479},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3255999982357025},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3098999857902527},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3093000054359436},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.3066999912261963},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3025999963283539},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.2969000041484833},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C77660490","wikidata":"https://www.wikidata.org/wiki/Q244916","display_name":"Intermediate language","level":3,"score":0.26159998774528503},{"id":"https://openalex.org/C98183937","wikidata":"https://www.wikidata.org/wiki/Q2112188","display_name":"Program analysis","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3460120.3484587","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3460120.3484587","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3460120.3484587","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2103.03809","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2103.03809","pdf_url":"https://arxiv.org/pdf/2103.03809","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3460120.3484587","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3460120.3484587","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3460120.3484587","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G25081","display_name":null,"funder_award_id":"No. 1719175","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G422237721","display_name":null,"funder_award_id":"No. N00014-17-1-2893","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G4504108201","display_name":null,"funder_award_id":"N00014-17-1","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G4751570384","display_name":null,"funder_award_id":"4-17-1-","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G5972417660","display_name":null,"funder_award_id":"1719175","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G637290822","display_name":null,"funder_award_id":"N00014-17-1-289","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G8876996369","display_name":null,"funder_award_id":"N00014","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3133719257.pdf","grobid_xml":"https://content.openalex.org/works/W3133719257.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W2142403498","https://openalex.org/W2511507028","https://openalex.org/W2749008552","https://openalex.org/W2886694146","https://openalex.org/W2888320512","https://openalex.org/W2901689459","https://openalex.org/W2926178846","https://openalex.org/W2962739339","https://openalex.org/W2962784628","https://openalex.org/W2963935794","https://openalex.org/W2964110616","https://openalex.org/W2970971581","https://openalex.org/W2979566992","https://openalex.org/W2997915791","https://openalex.org/W2998704965","https://openalex.org/W3011564318","https://openalex.org/W3011574394","https://openalex.org/W6600175266"],"related_works":[],"abstract_inverted_index":{"Deep":[0],"learning":[1,28,66],"has":[2,181],"demonstrated":[3],"its":[4],"strengths":[5],"in":[6,58,96,155],"numerous":[7],"binary":[8,15,30,135],"analysis":[9,31],"tasks,":[10,32],"including":[11],"function":[12,18],"boundary":[13],"detection,":[14],"code":[16],"search,":[17],"prototype":[19],"inference,":[20],"value":[21],"set":[22],"analysis,":[23],"etc.":[24],"When":[25],"applying":[26],"deep":[27],"to":[29,35,51,54,76,115,142,161],"we":[33,49,113],"need":[34,50],"decide":[36],"what":[37],"input":[38],"should":[39],"be":[40,105],"fed":[41],"into":[42],"the":[43,72,78,86,98,153,182,190],"neural":[44],"network":[45],"model.":[46],"More":[47],"specifically,":[48],"answer":[52],"how":[53],"represent":[55],"an":[56,117],"instruction":[57,67,126,177,192],"a":[59],"fixed-length":[60],"vector.":[61],"The":[62],"idea":[63],"of":[64,81,146],"automatically":[65],"representations":[68],"is":[69,101],"intriguing,":[70],"but":[71],"existing":[73,156],"schemes":[74,84,194],"fail":[75],"capture":[77,143],"unique":[79],"characteristics":[80,145],"disassembly.":[82],"These":[83,149],"ignore":[85],"complex":[87],"intra-instruction":[88],"structures":[89],"and":[90,103,169,172,188],"mainly":[91],"rely":[92],"on":[93,132],"control":[94],"flow":[95],"which":[97],"contextual":[99],"information":[100],"noisy":[102],"can":[104,159],"influenced":[106],"by":[107,128],"compiler":[108],"optimizations.":[109],"In":[110],"this":[111],"paper,":[112],"propose":[114],"pre-train":[116],"assembly":[118,147],"language":[119],"model":[120],"called":[121],"PalmTree":[122,137,174,180],"for":[123,185,195],"generating":[124],"general-purpose":[125],"embeddings":[127],"conducting":[129],"self-supervised":[130],"training":[131,150],"large-scale":[133],"unlabeled":[134],"corpora.":[136],"utilizes":[138],"three":[139],"pre-training":[140],"tasks":[141,151],"various":[144],"language.":[148],"overcome":[152],"problems":[154],"schemes,":[157],"thus":[158],"help":[160],"generate":[162],"high-quality":[163],"representations.":[164],"We":[165],"conduct":[166],"both":[167],"intrinsic":[168,186],"extrinsic":[170],"evaluations,":[171],"compare":[173],"with":[175],"other":[176,191],"embedding":[178,193],"schemes.":[179],"best":[183],"performance":[184],"metrics,":[187],"outperforms":[189],"all":[196],"downstream":[197],"tasks.":[198]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":57},{"year":2024,"cited_by_count":41},{"year":2023,"cited_by_count":21},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-22T08:38:42.863108","created_date":"2021-03-15T00:00:00"}
