{"id":"https://openalex.org/W7147548015","doi":"https://doi.org/10.48550/arxiv.2603.29813","title":"Compiling Code LLMs into Lightweight Executables","display_name":"Compiling Code LLMs into Lightweight Executables","publication_year":2026,"publication_date":"2026-03-31","ids":{"openalex":"https://openalex.org/W7147548015","doi":"https://doi.org/10.48550/arxiv.2603.29813"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.29813","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29813","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.29813","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002667771","display_name":"Jieke Shi","orcid":"https://orcid.org/0000-0002-0799-5018"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shi, Jieke","raw_affiliation_strings":["James"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"James","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103075577","display_name":"Junda He","orcid":"https://orcid.org/0000-0003-3370-8585"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Junda","raw_affiliation_strings":["James"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"James","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132623993","display_name":"Zhou Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Zhou","raw_affiliation_strings":["James"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"James","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132581939","display_name":"Chengran Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Chengran","raw_affiliation_strings":["James"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"James","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046079954","display_name":"Mykhailo V. Klymenko","orcid":"https://orcid.org/0000-0002-4641-8977"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Klymenko, Mykhailo","raw_affiliation_strings":["James"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"James","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101819360","display_name":"Thong Hoang","orcid":"https://orcid.org/0000-0001-5096-4834"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hoang, Thong","raw_affiliation_strings":["James"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"James","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132569811","display_name":"Xiwei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Xiwei","raw_affiliation_strings":["Sherry"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sherry","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132687642","display_name":"Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xing, Zhenchang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5028641941","display_name":"Zhenchang Xing","orcid":"https://orcid.org/0000-0001-7663-1421"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lo, David","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5002667771"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.17520000040531158,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.17520000040531158,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.16110000014305115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.10320000350475311,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.8260999917984009},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5376999974250793},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.49729999899864197},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.49639999866485596},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.462799996137619},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4228000044822693},{"id":"https://openalex.org/keywords/inference-engine","display_name":"Inference engine","score":0.397599995136261},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.38830000162124634}],"concepts":[{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.8260999917984009},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8217999935150146},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5376999974250793},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5171999931335449},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.49729999899864197},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.49639999866485596},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.462799996137619},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4228000044822693},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.397599995136261},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.38830000162124634},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3398999869823456},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.33889999985694885},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.314300000667572},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2969000041484833},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C2778361913","wikidata":"https://www.wikidata.org/wiki/Q7248437","display_name":"Program transformation","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.28049999475479126},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C198370458","wikidata":"https://www.wikidata.org/wiki/Q586459","display_name":"Type inference","level":3,"score":0.27469998598098755},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.26899999380111694},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2669000029563904},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.2554999887943268},{"id":"https://openalex.org/C135257023","wikidata":"https://www.wikidata.org/wiki/Q691358","display_name":"Domain-specific language","level":2,"score":0.25279998779296875},{"id":"https://openalex.org/C154690210","wikidata":"https://www.wikidata.org/wiki/Q1668499","display_name":"Rewriting","level":2,"score":0.25220000743865967}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.29813","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29813","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.29813","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29813","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.45143505930900574,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,16,132,161,199],"demand":[1],"for":[2,30,195],"better":[3],"prediction":[4],"accuracy":[5,253],"and":[6,18,47,70,88,94,120,152,174,228,240],"higher":[7],"execution":[8],"performance":[9],"in":[10,266],"neural":[11],"networks":[12],"continues":[13],"to":[14,101,232,255],"grow.":[15],"emergence":[17],"success":[19],"of":[20,117,201,263],"Large":[21],"Language":[22],"Models":[23],"(LLMs)":[24],"have":[25],"produced":[26],"many":[27],"cloud-based":[28],"tools":[29],"software":[31],"engineering":[32],"tasks":[33],"such":[34,58,85],"as":[35,59,86,156],"code":[36],"suggestion.":[37],"Although":[38],"effective,":[39],"cloud":[40],"deployment":[41,76],"raises":[42],"concerns":[43],"over":[44],"privacy,":[45],"latency,":[46],"reliance":[48],"on":[49,55,213,219],"network":[50],"connectivity.":[51],"Running":[52],"LLMs":[53,119],"locally":[54],"personal":[56],"devices":[57,81],"laptops":[60],"would":[61],"address":[62],"these":[63],"issues,":[64],"because":[65],"it":[66,99],"enables":[67],"offline":[68],"use":[69],"reduces":[71],"response":[72],"time.":[73],"However,":[74],"local":[75],"is":[77,134,164,203],"challenging,":[78],"since":[79],"commodity":[80,214],"lack":[82],"high-performance":[83],"accelerators":[84],"GPUs":[87],"are":[89,192],"constrained":[90],"by":[91,139],"limited":[92],"memory":[93,238],"compute":[95],"capacity,":[96],"which":[97,142],"makes":[98],"hard":[100],"execute":[102,125],"large":[103],"models":[104],"efficiently.":[105],"We":[106,216],"present":[107],"Ditto,":[108],"a":[109,135,157,165,204],"framework":[110],"that":[111,124,171,191,207],"optimizes":[112],"both":[113],"the":[114,121,196,209,256],"model":[115,144],"size":[116],"Code":[118,211,222,225],"inference":[122,249],"programs":[123],"them.":[126],"Our":[127],"approach":[128],"integrates":[129],"two":[130],"components.":[131],"first":[133],"quantization":[136],"technique":[137],"inspired":[138],"product":[140],"quantization,":[141],"groups":[143],"parameters":[145],"into":[146,169,184],"per-block":[147],"codebooks":[148],"via":[149],"K-Means":[150],"clustering":[151],"stores":[153],"each":[154],"weight":[155],"bit-packed":[158],"low-bitwidth":[159],"index.":[160],"second":[162],"component":[163],"compilation":[166],"pass":[167],"integrated":[168],"LLVM":[170],"automatically":[172],"detects":[173],"replaces":[175],"unoptimized":[176],"General":[177],"Matrix-Vector":[178],"Multiplication":[179],"(GEMV)":[180],"operations,":[181],"with":[182,246,259],"calls":[183],"Basic":[185],"Linear":[186],"Algebra":[187],"Subprograms":[188],"(BLAS)":[189],"libraries":[190],"highly":[193],"optimized":[194],"target":[197],"hardware.":[198,215],"output":[200],"Ditto":[202,218],"compiled":[205],"executable":[206],"runs":[208],"selected":[210],"LLM":[212],"evaluate":[217],"three":[220],"popular":[221],"LLMs,":[223],"namely":[224],"Llama,":[226],"MagicCoder,":[227],"OpenCodeInterpreter,":[229],"achieving":[230],"up":[231],"10.5$\\times$":[233,241],"faster":[234],"inference,":[235],"6.4$\\times$":[236],"lower":[237,242],"usage,":[239],"energy":[243],"consumption":[244],"compared":[245],"their":[247],"original":[248],"pipelines,":[250],"while":[251],"preserving":[252],"close":[254],"full-precision":[257],"models,":[258],"an":[260],"average":[261],"loss":[262],"only":[264],"0.27%":[265],"pass@1.":[267]},"counts_by_year":[],"updated_date":"2026-04-28T06:04:28.489925","created_date":"2026-04-02T00:00:00"}
