{"id":"https://openalex.org/W4415540230","doi":"https://doi.org/10.1145/3746027.3755683","title":"Maximum Redundancy Pruning: A Principle-Driven Layerwise Sparsity Allocation for LLMs","display_name":"Maximum Redundancy Pruning: A Principle-Driven Layerwise Sparsity Allocation for LLMs","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415540230","doi":"https://doi.org/10.1145/3746027.3755683"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755683","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755683","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111336307","display_name":"Chang Gao","orcid":"https://orcid.org/0000-0003-4009-426X"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang Gao","raw_affiliation_strings":["Beijing Jiaotong University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4009-426X","affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kang Zhao","orcid":"https://orcid.org/0009-0004-7672-3229"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kang Zhao","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-7672-3229","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088792971","display_name":"Runqi Wang","orcid":"https://orcid.org/0000-0003-0800-3954"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runqi Wang","raw_affiliation_strings":["Beijing Jiaotong University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-0800-3954","affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108103493","display_name":"Jianfei Chen","orcid":"https://orcid.org/0000-0002-9279-6098"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianfei Chen","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9279-6098","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069749738","display_name":"Liping Jing","orcid":"https://orcid.org/0000-0001-7578-3407"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liping Jing","raw_affiliation_strings":["Beijing Jiaotong University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7578-3407","affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.32126678,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"11062","last_page":"11070"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.9805999994277954,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.7434999942779541},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.743399977684021},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.6532999873161316},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5698999762535095},{"id":"https://openalex.org/keywords/performance-metric","display_name":"Performance metric","score":0.34630000591278076}],"concepts":[{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.7434999942779541},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.743399977684021},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.6532999873161316},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.600600004196167},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5698999762535095},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4456000030040741},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3788999915122986},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3562000095844269},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.34630000591278076},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.3086000084877014},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2651999890804291},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.25519999861717224}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755683","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755683","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3587807413","display_name":null,"funder_award_id":"62436001, 62176020","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G534689629","display_name":null,"funder_award_id":"RCS2023K006","funder_id":"https://openalex.org/F4320323067","funder_display_name":"State Key Laboratory of Rail Traffic Control and Safety"},{"id":"https://openalex.org/G7563676223","display_name":null,"funder_award_id":"2024YFE0202900","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323067","display_name":"State Key Laboratory of Rail Traffic Control and Safety","ror":"https://ror.org/01yj56c84"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2156150815","https://openalex.org/W2886851211","https://openalex.org/W2965862774","https://openalex.org/W3194676777","https://openalex.org/W4213080446","https://openalex.org/W4214517123","https://openalex.org/W4312443924"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"have":[4,25],"demonstrated":[5],"impressive":[6],"capabilities,":[7],"but":[8],"their":[9],"enormous":[10],"size":[11],"poses":[12],"significant":[13,79],"challenges":[14],"for":[15,44],"deployment":[16],"in":[17,37,144,163],"real-world":[18],"applications.":[19],"To":[20,148],"address":[21],"this":[22,66,149],"issue,":[23],"researchers":[24],"sought":[26],"to":[27,32,62,110,132],"apply":[28],"network":[29],"pruning":[30,38,96,136,159],"techniques":[31],"LLMs.":[33],"A":[34],"critical":[35],"challenge":[36],"is":[39,89,108],"the":[40,82,93,102,111,125,145,164,171,184,206],"allocation":[41,49],"of":[42,87,95,104,113,128,208],"sparsity":[43,48,127,181],"each":[45,176],"layer.":[46],"Recent":[47],"methods":[50],"are":[51],"often":[52],"based":[53],"on":[54,119,191,199],"heuristics":[55],"or":[56],"search":[57],"that":[58,124,161],"can":[59],"easily":[60],"lead":[61],"suboptimal":[63],"performance.":[64],"In":[65],"paper,":[67],"we":[68,122,151],"conducted":[69,188],"an":[70,157],"extensive":[71,189],"investigation":[72],"into":[73],"various":[74,200],"LLMs":[75,88,129],"and":[76,100,139,197],"revealed":[77],"three":[78,133],"discoveries:":[80],"(1)":[81],"Layerwise":[83],"Pruning":[84,155],"Sensitivity":[85],"(LPS)":[86],"highly":[90],"non-uniform,":[91],"(2)":[92],"choice":[94],"metric":[97,137],"affects":[98],"LPS,":[99],"(3)":[101],"performance":[103],"a":[105],"sparse":[106],"model":[107],"related":[109],"uniformity":[112],"its":[114,211],"layerwise":[115,126,141,180],"redundancy":[116,142],"level.":[117],"Based":[118],"these":[120],"discoveries,":[121],"propose":[123],"should":[130],"adhere":[131],"principles:":[134],"non-uniformity,":[135],"dependency,":[138],"uniform":[140],"level":[143],"pruned":[146],"model.":[147],"end,":[150],"proposed":[152],"Maximum":[153],"Redundancy":[154],"(MRP),":[156],"iterative":[158],"algorithm":[160],"prunes":[162],"most":[165],"redundant":[166],"layers":[167],"(i.e.,":[168],"those":[169],"with":[170,183],"highest":[172],"non-outlier":[173],"ratio)":[174],"at":[175],"iteration.":[177],"The":[178,202],"achieved":[179],"aligns":[182],"outlined":[185],"principles.":[186],"We":[187],"experiments":[190],"publicly":[192],"available":[193],"LLMs,":[194],"including":[195],"LLaMA2":[196],"OPT,":[198],"benchmarks.":[201],"experimental":[203],"results":[204],"validate":[205],"effectiveness":[207],"MRP,":[209],"demonstrating":[210],"superiority":[212],"over":[213],"previous":[214],"methods.":[215]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-25T00:00:00"}
