{"id":"https://openalex.org/W4392904185","doi":"https://doi.org/10.1109/icassp48485.2024.10445737","title":"One-Shot Sensitivity-Aware Mixed Sparsity Pruning for Large Language Models","display_name":"One-Shot Sensitivity-Aware Mixed Sparsity Pruning for Large Language Models","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904185","doi":"https://doi.org/10.1109/icassp48485.2024.10445737"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10445737","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10445737","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080901723","display_name":"Hang Shao","orcid":"https://orcid.org/0000-0002-1322-4789"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hang Shao","raw_affiliation_strings":["AI Institute,Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence","Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence, AI Institute","Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"AI Institute,Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence","institution_ids":["https://openalex.org/I4210099069"]},{"raw_affiliation_string":"Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence, AI Institute","institution_ids":[]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114910830","display_name":"Bei Liu","orcid":"https://orcid.org/0000-0002-6208-003X"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bei Liu","raw_affiliation_strings":["AI Institute,Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence","Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence, AI Institute"],"affiliations":[{"raw_affiliation_string":"AI Institute,Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence","institution_ids":["https://openalex.org/I4210099069"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence, AI Institute","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100341993","display_name":"Yanmin Qian","orcid":"https://orcid.org/0000-0002-0314-3790"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanmin Qian","raw_affiliation_strings":["AI Institute,Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence","Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence, AI Institute","Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"AI Institute,Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence","institution_ids":["https://openalex.org/I4210099069"]},{"raw_affiliation_string":"Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence, AI Institute","institution_ids":[]},{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5080901723"],"corresponding_institution_ids":["https://openalex.org/I183067930","https://openalex.org/I4210099069"],"apc_list":null,"apc_paid":null,"fwci":7.3586,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.97495321,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"11296","last_page":"11300"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6905328035354614},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5820184946060181},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5332645177841187},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.5321836471557617},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5191075801849365},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5123382806777954},{"id":"https://openalex.org/keywords/hessian-matrix","display_name":"Hessian matrix","score":0.49589523673057556},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.48285871744155884},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.43257737159729004},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41331109404563904},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.28645122051239014},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13238954544067383},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11518701910972595},{"id":"https://openalex.org/keywords/electronic-engineering","display_name":"Electronic engineering","score":0.08336597681045532}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6905328035354614},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5820184946060181},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5332645177841187},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.5321836471557617},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5191075801849365},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5123382806777954},{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.49589523673057556},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.48285871744155884},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.43257737159729004},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41331109404563904},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28645122051239014},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13238954544067383},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11518701910972595},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.08336597681045532},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10445737","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10445737","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1899722019","display_name":null,"funder_award_id":"62071288","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3537961753","display_name":null,"funder_award_id":"62122050","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5465080215","display_name":null,"funder_award_id":"2021SHZDZX0102","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"},{"id":"https://openalex.org/G7883464895","display_name":null,"funder_award_id":"SHZDZX0102","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"},{"id":"https://openalex.org/G7955319111","display_name":null,"funder_award_id":"021SHZDZX0102","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"},{"id":"https://openalex.org/G8551726186","display_name":null,"funder_award_id":"2021SHZDZX","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"},{"id":"https://openalex.org/G8721642152","display_name":null,"funder_award_id":"2021SHZDZX0102","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321885","display_name":"Science and Technology Commission of Shanghai Municipality","ror":"https://ror.org/03kt66j61"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1674493795","https://openalex.org/W1999352252","https://openalex.org/W2057450058","https://openalex.org/W2093647425","https://openalex.org/W2125389748","https://openalex.org/W2764043458","https://openalex.org/W2946609015","https://openalex.org/W2963091133","https://openalex.org/W2970454332","https://openalex.org/W2978017171","https://openalex.org/W2979691890","https://openalex.org/W2982041622","https://openalex.org/W3015233032","https://openalex.org/W3101066076","https://openalex.org/W3127829048","https://openalex.org/W3173256823","https://openalex.org/W3173374050","https://openalex.org/W3201174429","https://openalex.org/W3211787299","https://openalex.org/W4206634569","https://openalex.org/W4226075153","https://openalex.org/W4281651027","https://openalex.org/W4287327026","https://openalex.org/W4288026258","https://openalex.org/W4288089799","https://openalex.org/W4293166090","https://openalex.org/W4298422451","https://openalex.org/W4307934016","https://openalex.org/W4309591680","https://openalex.org/W4313484599","https://openalex.org/W4322718191","https://openalex.org/W4377371819","https://openalex.org/W4379548477","https://openalex.org/W4384918448","https://openalex.org/W4385573119","https://openalex.org/W6677103964","https://openalex.org/W6678583879","https://openalex.org/W6727099177","https://openalex.org/W6745148473","https://openalex.org/W6768851824","https://openalex.org/W6769627184","https://openalex.org/W6770425567","https://openalex.org/W6776129198","https://openalex.org/W6781376665","https://openalex.org/W6790943123","https://openalex.org/W6797854001","https://openalex.org/W6803815625","https://openalex.org/W6838633097","https://openalex.org/W6842795023","https://openalex.org/W6846164622","https://openalex.org/W6847478871","https://openalex.org/W6848451824","https://openalex.org/W6850625674","https://openalex.org/W6852927819","https://openalex.org/W6852962002","https://openalex.org/W6854866820"],"related_works":["https://openalex.org/W4288365749","https://openalex.org/W2936497627","https://openalex.org/W3013624417","https://openalex.org/W4287826556","https://openalex.org/W3098382480","https://openalex.org/W4287598411","https://openalex.org/W3100913109","https://openalex.org/W3198458223","https://openalex.org/W3126642501","https://openalex.org/W2964413124"],"abstract_inverted_index":{"Various":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"from":[5],"the":[6,25,44,85,105,112,119],"Generative":[7],"Pretrained":[8],"Transformer":[9],"(GPT)":[10],"family":[11],"have":[12,29],"achieved":[13],"outstanding":[14],"performances":[15],"in":[16,34,59],"a":[17,56,67],"wide":[18],"range":[19],"of":[20,46,87,111,134],"text":[21],"generation":[22],"tasks.":[23],"However,":[24],"enormous":[26],"model":[27],"sizes":[28],"hindered":[30],"their":[31],"practical":[32],"use":[33],"real-world":[35],"applications":[36],"due":[37],"to":[38,76,79,99],"high":[39],"inference":[40],"latency.":[41],"Therefore,":[42],"improving":[43],"efficiencies":[45],"LLMs":[47,78],"through":[48],"quantization,":[49,130],"pruning,":[50],"and":[51],"other":[52],"means":[53],"has":[54],"been":[55],"key":[57],"issue":[58],"LLM":[60],"studies.":[61],"In":[62],"this":[63],"work,":[64],"we":[65],"propose":[66],"method":[68,114,126],"based":[69,94],"on":[70,95],"Hessian":[71],"sensitivity-aware":[72],"mixed":[73],"sparsity":[74,83,92,107,120],"pruning":[75],"prune":[77],"at":[80],"least":[81],"50%":[82],"without":[84],"need":[86],"any":[88],"retraining.":[89],"It":[90],"allocates":[91],"adaptively":[93],"sensitivity,":[96],"allowing":[97],"us":[98],"reduce":[100],"pruning-induced":[101],"error":[102],"while":[103],"maintaining":[104],"overall":[106],"level.":[108],"The":[109],"advantages":[110],"proposed":[113],"exhibit":[115],"even":[116],"more":[117],"when":[118],"is":[121,127],"extremely":[122],"high.":[123],"Furthermore,":[124],"our":[125],"compatible":[128],"with":[129],"enabling":[131],"further":[132],"compression":[133],"LLMs.":[135]},"counts_by_year":[{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":7}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
