{"id":"https://openalex.org/W7160539732","doi":"https://doi.org/10.48550/arxiv.2605.04341","title":"Budgeted LoRA: Distillation as Structured Compute Allocation for Efficient Inference","display_name":"Budgeted LoRA: Distillation as Structured Compute Allocation for Efficient Inference","publication_year":2026,"publication_date":"2026-05-05","ids":{"openalex":"https://openalex.org/W7160539732","doi":"https://doi.org/10.48550/arxiv.2605.04341"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.04341","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.04341","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.04341","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135597106","display_name":"Mohammed Sabry","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sabry, Mohammed","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135590783","display_name":"Anya Belz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Belz, Anya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5135597106"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.10750000178813934,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.10750000178813934,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.09790000319480896,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.09539999812841415,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.9391999840736389},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7275000214576721},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6726999878883362},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.604200005531311},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5813000202178955},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.45260000228881836},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.4207000136375427}],"concepts":[{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.9391999840736389},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7275000214576721},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6873999834060669},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6726999878883362},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.604200005531311},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5813000202178955},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.45260000228881836},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.4207000136375427},{"id":"https://openalex.org/C8505890","wikidata":"https://www.wikidata.org/wiki/Q605095","display_name":"Budget constraint","level":2,"score":0.4174000024795532},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3659999966621399},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.36550000309944153},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.36039999127388},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3517000079154968},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.32829999923706055},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26570001244544983},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.25619998574256897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.04341","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.04341","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.04341","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.04341","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,57],"study":[1],"distillation":[2,62],"for":[3],"large":[4],"language":[5],"models":[6,17],"under":[7,187],"explicit":[8],"compute":[9,71,85],"constraints,":[10],"with":[11,155,169],"the":[12,45,89,100],"goal":[13],"of":[14,75,93,136],"producing":[15],"student":[16,79],"that":[18,64,87,123],"are":[19],"not":[20],"only":[21],"cheaper":[22],"to":[23,34,52,211],"train,":[24],"but":[25],"structurally":[26],"efficient":[27],"at":[28,151,160],"inference":[29,55],"time.":[30],"While":[31],"prior":[32],"approaches":[33],"parameter-efficient":[35],"distillation,":[36,189],"such":[37],"as":[38,68],"LoRA,":[39,60],"reduce":[40],"adaptation":[41],"cost,":[42],"they":[43],"leave":[44],"dense":[46,94,105,112,129,207],"backbone":[47],"unchanged":[48],"and":[49,106,119,173],"therefore":[50],"fail":[51],"deliver":[53],"meaningful":[54],"savings.":[56],"propose":[58],"Budgeted":[59,145],"a":[61,69,77,83,134,140,152,156,166],"framework":[63],"treats":[65],"model":[66,101],"compression":[67,122],"structured":[70],"allocation":[72],"problem.":[73],"Instead":[74],"using":[76],"fixed":[78],"architecture,":[80],"we":[81],"introduce":[82],"global":[84],"budget":[86,142,154,163],"sets":[88],"final":[90],"target":[91],"fraction":[92],"computation":[95,208],"retained.":[96],"Under":[97],"this":[98],"constraint,":[99],"redistributes":[102],"capacity":[103],"across":[104],"low-rank":[107,117,212],"pathways":[108],"via":[109],"(i)":[110],"module-level":[111],"retention":[113],"coefficients,":[114],"(ii)":[115],"adaptive":[116],"allocation,":[118],"(iii)":[120],"post-training":[121],"selectively":[124],"removes,":[125],"approximates,":[126],"or":[127,197],"preserves":[128,175],"components.":[130],"This":[131],"formulation":[132],"yields":[133],"family":[135],"students":[137],"controlled":[138],"by":[139],"single":[141],"dial.":[143],"Empirically,":[144],"LoRA":[146,149],"matches":[147],"standard":[148],"perplexity":[150,171,196],"moderate":[153,170],"1.74x":[157],"compressed-module":[158],"speedup;":[159],"an":[161],"aggressive":[162],"it":[164,174,202],"achieves":[165],"4.05x":[167],"speedup":[168],"degradation,":[172],"higher":[176],"accuracy":[177],"on":[178],"function-style":[179],"in-context":[180],"learning":[181],"probes.":[182],"These":[183],"results":[184],"suggest":[185],"that,":[186],"compute-constrained":[188],"retaining":[190],"behavior":[191],"is":[192,203,209],"less":[193],"about":[194,204],"matching":[195],"removing":[198],"more":[199],"parameters":[200],"than":[201],"controlling":[205],"how":[206],"transferred":[210],"pathways.":[213]},"counts_by_year":[],"updated_date":"2026-05-08T13:18:25.657630","created_date":"2026-05-08T00:00:00"}
