{"id":"https://openalex.org/W7133323988","doi":"https://doi.org/10.48550/arxiv.2603.01376","title":"3BASiL: An Algorithmic Framework for Sparse plus Low-Rank Compression of LLMs","display_name":"3BASiL: An Algorithmic Framework for Sparse plus Low-Rank Compression of LLMs","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133323988","doi":"https://doi.org/10.48550/arxiv.2603.01376"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.01376","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01376","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.01376","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116142717","display_name":"Mehdi Makni","orcid":"https://orcid.org/0009-0009-7573-8775"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Makni, Mehdi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127998304","display_name":"Xiang Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Xiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5001765231","display_name":"Rahul Mazumder","orcid":"https://orcid.org/0000-0003-4285-7400"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mazumder, Rahul","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.12780000269412994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.12780000269412994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.11379999667406082,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.10369999706745148,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.7285000085830688},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5141000151634216},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.4918999969959259},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.4878000020980835},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.4498000144958496},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4375999867916107}],"concepts":[{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.7285000085830688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6559000015258789},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5630000233650208},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5141000151634216},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.4918999969959259},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.4878000020980835},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.4498000144958496},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4375999867916107},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.40560001134872437},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.37529999017715454},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.366100013256073},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.34769999980926514},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3158000111579895},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.25870001316070557},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.257099986076355}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.01376","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01376","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.01376","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01376","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sparse":[0,176],"plus":[1],"Low-Rank":[2],"$(\\mathbf{S}":[3,67,147,201],"+":[4,38,68,148,177,202],"\\mathbf{LR})$":[5,69,149,203],"decomposition":[6,70],"of":[7,30,71,87],"Large":[8],"Language":[9],"Models":[10],"(LLMs)":[11],"has":[12],"emerged":[13],"as":[14,142],"a":[15,28,81,125,174],"promising":[16],"direction":[17],"in":[18],"model":[19,25,169],"compression,":[20],"aiming":[21],"to":[22,52,93,166,182,199],"decompose":[23],"pre-trained":[24],"weights":[26],"into":[27],"sum":[29],"sparse":[31,115],"and":[32,116],"low-rank":[33,117],"matrices":[34],"$(\\mathbf{W}":[35],"\\approx":[36],"\\mathbf{S}":[37],"\\mathbf{LR})$.":[39],"Despite":[40],"recent":[41],"progress,":[42],"existing":[43],"methods":[44],"often":[45],"suffer":[46],"from":[47],"substantial":[48],"performance":[49],"degradation":[50],"compared":[51,181,198],"dense":[53,167],"models.":[54],"In":[55],"this":[56,75],"work,":[57],"we":[58],"introduce":[59],"3BASiL-TM,":[60],"an":[61,105,195],"efficient":[62,106],"one-shot":[63],"post-training":[64],"method":[65,187],"for":[66],"LLMs":[72],"that":[73,111,129,158],"addresses":[74],"gap.":[76],"Our":[77,154,205],"approach":[78],"first":[79],"introduces":[80],"novel":[82,126],"3-Block":[83],"Alternating":[84],"Direction":[85],"Method":[86],"Multipliers":[88],"(ADMM)":[89],"method,":[90],"termed":[91],"3BASiL,":[92],"minimize":[94],"the":[95,114,133,137,161],"layer-wise":[96],"reconstruction":[97],"error":[98],"with":[99],"convergence":[100],"guarantees.":[101],"We":[102],"then":[103],"design":[104],"transformer-matching":[107],"(TM)":[108],"refinement":[109],"step":[110,123],"jointly":[112],"optimizes":[113],"components":[118],"across":[119],"transformer":[120,134],"layers.":[121],"This":[122],"minimizes":[124],"memory-efficient":[127],"loss":[128],"aligns":[130],"outputs":[131],"at":[132,209],"level.":[135],"Notably,":[136],"TM":[138],"procedure":[139],"is":[140,207],"universal":[141],"it":[143],"can":[144],"enhance":[145],"any":[146],"decomposition,":[150],"including":[151],"pure":[152],"sparsity.":[153],"numerical":[155],"experiments":[156],"show":[157],"3BASiL-TM":[159],"reduces":[160],"WikiText2":[162],"perplexity":[163],"gap":[164],"relative":[165],"LLaMA-8B":[168],"by":[170],"over":[171,189],"30%":[172],"under":[173],"(2:4":[175],"64":[178],"LR)":[179],"configuration,":[180],"prior":[183],"methods.":[184],"Moreover,":[185],"our":[186],"achieves":[188],"2.5x":[190],"faster":[191],"compression":[192],"runtime":[193],"on":[194],"A100":[196],"GPU":[197],"SOTA":[200],"method.":[204],"code":[206],"available":[208],"https://github.com/mazumder-lab/3BASiL.":[210]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-04T00:00:00"}
