{"id":"https://openalex.org/W4415546053","doi":"https://doi.org/10.1145/3746262.3761975","title":"Efficient and Accurate Post-Training Sparsification of Large Language Models with Proximal Operators","display_name":"Efficient and Accurate Post-Training Sparsification of Large Language Models with Proximal Operators","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415546053","doi":"https://doi.org/10.1145/3746262.3761975"},"language":null,"primary_location":{"id":"doi:10.1145/3746262.3761975","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746262.3761975","pdf_url":null,"source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Workshop on Rich Media With Generative AI","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3746262.3761975","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073885088","display_name":"Pu Zhao","orcid":"https://orcid.org/0000-0001-5018-2859"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pu Zhao","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0001-5018-2859","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014695514","display_name":"Dani Gunawan","orcid":"https://orcid.org/0000-0002-7023-1176"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dani Gunawan","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0002-7023-1176","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101655527","display_name":"Xuan Shen","orcid":"https://orcid.org/0000-0003-4965-7321"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xuan Shen","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0003-4965-7321","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037655503","display_name":"Zheng Zhan","orcid":"https://orcid.org/0000-0002-3882-5484"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zheng Zhan","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0002-3882-5484","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061509048","display_name":"Xuehang Guo","orcid":"https://orcid.org/0009-0009-1143-9063"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xuehang Guo","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Champaign, USA"],"raw_orcid":"https://orcid.org/0009-0009-1143-9063","affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Champaign, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100362104","display_name":"Jun Liu","orcid":"https://orcid.org/0000-0003-3808-4599"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jun Liu","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0003-3808-4599","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078971265","display_name":"Zhenglun Kong","orcid":"https://orcid.org/0000-0002-8120-4456"},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhenglun Kong","raw_affiliation_strings":["Harvard University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0002-8120-4456","affiliations":[{"raw_affiliation_string":"Harvard University, Boston, USA","institution_ids":["https://openalex.org/I2801851002"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100651384","display_name":"Yanzhi Wang","orcid":"https://orcid.org/0000-0002-3024-7990"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanzhi Wang","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0002-3024-7990","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Gaowen Liu","orcid":"https://orcid.org/0009-0000-9194-1233"},"institutions":[{"id":"https://openalex.org/I2801562743","display_name":"Cisco College","ror":"https://ror.org/03gc7jk79","country_code":"US","type":"education","lineage":["https://openalex.org/I2801562743"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gaowen Liu","raw_affiliation_strings":["Cisco Research, San Francisco, USA"],"raw_orcid":"https://orcid.org/0009-0000-9194-1233","affiliations":[{"raw_affiliation_string":"Cisco Research, San Francisco, USA","institution_ids":["https://openalex.org/I2801562743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043582832","display_name":"Xue Lin","orcid":"https://orcid.org/0000-0001-6210-8883"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xue Lin","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0001-6210-8883","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14538619,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"11","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.8529999852180481},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6801999807357788},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6776000261306763},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.42100000381469727},{"id":"https://openalex.org/keywords/retraining","display_name":"Retraining","score":0.39570000767707825},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.35260000824928284},{"id":"https://openalex.org/keywords/multiplier","display_name":"Multiplier (economics)","score":0.3508000075817108},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3018999993801117}],"concepts":[{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.8529999852180481},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7544999718666077},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6801999807357788},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6776000261306763},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4708000123500824},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.430400013923645},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.42149999737739563},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.42100000381469727},{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.39570000767707825},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.35260000824928284},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.3508000075817108},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.30469998717308044},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.302700012922287},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3018999993801117},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.2784999907016754},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C3018263672","wikidata":"https://www.wikidata.org/wiki/Q1296251","display_name":"Efficient algorithm","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C2777480716","wikidata":"https://www.wikidata.org/wiki/Q23582796","display_name":"Resource consumption","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746262.3761975","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746262.3761975","pdf_url":null,"source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Workshop on Rich Media With Generative AI","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3746262.3761975","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746262.3761975","pdf_url":null,"source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Workshop on Rich Media With Generative AI","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2156150815","https://openalex.org/W2276892413","https://openalex.org/W2900971432","https://openalex.org/W2909079677","https://openalex.org/W2946225357","https://openalex.org/W2963091133","https://openalex.org/W2963685823","https://openalex.org/W2979826702","https://openalex.org/W2981892732","https://openalex.org/W2998161546","https://openalex.org/W3092564588","https://openalex.org/W3163966209","https://openalex.org/W3178531146","https://openalex.org/W3184606595","https://openalex.org/W3184682079","https://openalex.org/W3202459663","https://openalex.org/W3212723535","https://openalex.org/W4244393449","https://openalex.org/W4288337628","https://openalex.org/W4292363360","https://openalex.org/W4295745232","https://openalex.org/W4312121119","https://openalex.org/W4312446478","https://openalex.org/W4382450529","https://openalex.org/W4386075865","https://openalex.org/W4386765321","https://openalex.org/W4388667495","https://openalex.org/W4389576338","https://openalex.org/W4404133695","https://openalex.org/W4404783218","https://openalex.org/W4409363545","https://openalex.org/W4413147647"],"related_works":[],"abstract_inverted_index":{"Deploying":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"faces":[5],"great":[6],"challenges":[7],"because":[8],"of":[9,79,141],"their":[10,26],"massive":[11],"parameters":[12],"and":[13,29,70,91,133,143],"computations.":[14],"Traditional":[15],"pruning":[16,104],"methods":[17],"can":[18,100],"hardly":[19],"be":[20],"applied":[21],"for":[22,32],"LLMs":[23],"due":[24],"to":[25,82,112],"high":[27],"GPU":[28],"data":[30],"consumption":[31],"fine-tuning":[33],"or":[34,129],"retraining":[35],"on":[36,96,127],"the":[37,84,102],"full":[38],"dataset.":[39],"In":[40,63],"response,":[41],"post-training":[42,72],"techniques":[43],"with":[44,59,114],"reduced":[45],"resource":[46],"requirements":[47],"have":[48],"gained":[49],"increasing":[50],"popularity,":[51],"as":[52],"it":[53],"typically":[54],"does":[55],"not":[56],"require":[57],"retraining,":[58],"certain":[60],"performance":[61,138],"degradation.":[62],"this":[64],"paper,":[65],"we":[66],"propose":[67],"an":[68],"efficient":[69],"accurate":[71],"sparsification":[73],"method":[74],"using":[75],"Alternating":[76],"Direction":[77],"Method":[78],"Multipliers":[80],"(ADMM),":[81],"minimize":[83],"accuracy":[85,144],"loss":[86],"through":[87],"searching":[88],"pruned":[89],"weights":[90],"modifying":[92],"remaining":[93],"weights.":[94],"Based":[95],"proximal":[97],"operators,":[98],"ADMM":[99],"split":[101],"original":[103],"problem":[105],"into":[106],"multiple":[107],"sub-problems,":[108],"which":[109],"are":[110],"easier":[111],"solve":[113],"our":[115,136],"derived":[116],"optimal":[117],"solutions.":[118],"Our":[119],"comprehensive":[120],"experiments":[121],"across":[122],"various":[123],"LLM":[124],"families":[125],"(based":[126],"transformers":[128],"state":[130],"space":[131],"models)":[132],"datasets":[134],"demonstrate":[135],"superior":[137],"in":[139],"terms":[140],"perplexity":[142],"over":[145],"SOTA":[146],"baselines.":[147]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-25T00:00:00"}
