{"id":"https://openalex.org/W4415970852","doi":"https://doi.org/10.1109/tmc.2025.3629756","title":"Lightweight and Post-Training Structured Pruning for On-Device Large Language Models","display_name":"Lightweight and Post-Training Structured Pruning for On-Device Large Language Models","publication_year":2025,"publication_date":"2025-11-06","ids":{"openalex":"https://openalex.org/W4415970852","doi":"https://doi.org/10.1109/tmc.2025.3629756"},"language":null,"primary_location":{"id":"doi:10.1109/tmc.2025.3629756","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmc.2025.3629756","pdf_url":null,"source":{"id":"https://openalex.org/S69141925","display_name":"IEEE Transactions on Mobile Computing","issn_l":"1536-1233","issn":["1536-1233","1558-0660","2161-9875"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Mobile Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zihuai Xu","orcid":"https://orcid.org/0009-0009-2192-4580"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zihuai Xu","raw_affiliation_strings":["School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0009-0009-2192-4580","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018803737","display_name":"Yang Xu","orcid":"https://orcid.org/0000-0003-0839-3892"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Xu","raw_affiliation_strings":["School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0000-0003-0839-3892","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063184427","display_name":"Hongli Xu","orcid":"https://orcid.org/0000-0003-3831-4577"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongli Xu","raw_affiliation_strings":["School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0000-0003-3831-4577","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062964635","display_name":"Yunming Liao","orcid":"https://orcid.org/0000-0002-5065-2600"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunming Liao","raw_affiliation_strings":["School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0000-0002-5065-2600","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032224075","display_name":"Zhiwei Yao","orcid":"https://orcid.org/0009-0007-2284-3323"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiwei Yao","raw_affiliation_strings":["School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0009-0007-2284-3323","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":null,"display_name":"Zuan Xie","orcid":"https://orcid.org/0009-0003-0150-3152"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zuan Xie","raw_affiliation_strings":["School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0009-0003-0150-3152","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":1.1332,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.83902908,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"25","issue":"4","first_page":"5377","last_page":"5392"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.44859999418258667,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.44859999418258667,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.11209999769926071,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.05490000173449516,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.8999999761581421},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6154000163078308},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.5205000042915344},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4952000081539154},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.39410001039505005},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.37229999899864197},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.35839998722076416}],"concepts":[{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.8999999761581421},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8708999752998352},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6154000163078308},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.5205000042915344},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4952000081539154},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44999998807907104},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41260001063346863},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.39410001039505005},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.37229999899864197},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.35839998722076416},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29490000009536743},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.27900001406669617},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.2646999955177307}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmc.2025.3629756","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmc.2025.3629756","pdf_url":null,"source":{"id":"https://openalex.org/S69141925","display_name":"IEEE Transactions on Mobile Computing","issn_l":"1536-1233","issn":["1536-1233","1558-0660","2161-9875"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Mobile Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2668306262","display_name":null,"funder_award_id":"WK2150250044","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G5563275549","display_name":null,"funder_award_id":"624B2136","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G645416012","display_name":null,"funder_award_id":"62132019","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7992979919","display_name":null,"funder_award_id":"62472401","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1632114991","https://openalex.org/W1989314419","https://openalex.org/W2027461913","https://openalex.org/W2480854438","https://openalex.org/W2507380695","https://openalex.org/W2896457183","https://openalex.org/W2923014074","https://openalex.org/W2963123047","https://openalex.org/W2998617917","https://openalex.org/W3034457116","https://openalex.org/W3037032032","https://openalex.org/W3116594510","https://openalex.org/W3130554079","https://openalex.org/W3194676777","https://openalex.org/W4312258136","https://openalex.org/W4386083031","https://openalex.org/W4389120417","https://openalex.org/W4393147854","https://openalex.org/W4394862768","https://openalex.org/W4400446377","https://openalex.org/W4402670757","https://openalex.org/W4402713323","https://openalex.org/W4409364145","https://openalex.org/W4412887764","https://openalex.org/W4415689913","https://openalex.org/W7133230376"],"related_works":[],"abstract_inverted_index":{"Considering":[0],"the":[1,14,35,86,92,104,141,153,176,179],"hardware-friendly":[2],"characteristics,":[3],"structured":[4,66,129],"pruning":[5,26,67,131,162,171,217],"has":[6],"emerged":[7],"as":[8],"an":[9],"effective":[10],"solution":[11],"to":[12,42,109,167,188,243],"reduce":[13,34],"resource":[15],"requirements":[16],"of":[17,30,96,106,118,143,178,219,228],"large":[18],"language":[19],"models":[20,110,212],"(LLMs)":[21],"on":[22,49,197],"resource-constrained":[23],"devices.":[24],"Since":[25],"a":[27,124],"certain":[28],"number":[29],"parameters":[31],"will":[32],"often":[33],"model":[36,144,190,234],"accuracy,":[37],"fine-tuning":[38,47],"is":[39],"usually":[40],"required":[41],"recover":[43],"performance":[44,239],"loss.":[45],"However,":[46],"relies":[48],"high":[50,93],"device":[51,202],"power":[52],"and":[53,83,103,126,146,152,163,201],"substantial":[54],"data,":[55],"making":[56],"it":[57],"unsuitable":[58],"for":[59,134],"on-device":[60,135],"applications.":[61,119,136],"Recent":[62],"approaches":[63],"propose":[64],"post-training":[65,128],"techniques":[68],"that":[69,207],"do":[70],"not":[71,173],"require":[72],"fine-tuning,":[73,194],"following":[74],"three":[75],"granularities,":[76],"<italic":[77],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[78],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">i.e.</i>,":[79],"neurons,":[80],"attention":[81],"heads,":[82],"layers.":[84],"Unfortunately,":[85],"previous":[87],"solutions":[88],"still":[89],"suffer":[90],"from":[91],"memory":[94,247],"overhead":[95,248],"substructures'":[97],"importance":[98,142],"evaluation":[99],"at":[100],"different":[101],"granularities":[102],"problem":[105],"only":[107],"applying":[108],"with":[111,213],"specific":[112],"structures,":[113],"which":[114],"limit":[115],"their":[116],"scope":[117],"This":[120],"paper":[121],"proposes":[122],"COMP,":[123],"lightweight":[125],"general":[127],"hybrid-granularity":[130],"method":[132],"designed":[133],"COMP":[137,158,184,208,222,237],"begins":[138],"by":[139,174,240,249],"assessing":[140],"layers":[145],"neurons":[147],"individually":[148],"using":[149],"distributional":[150],"distance":[151],"matrix":[154],"condition":[155],"number.":[156],"Subsequently,":[157],"performs":[159,209],"fine-grained":[160],"neuron":[161],"simultaneously":[164],"determines":[165],"whether":[166],"apply":[168],"coarse-grained":[169],"layer":[170],"or":[172],"comparing":[175],"outcomes":[177],"two":[180],"granularity":[181],"strategies.":[182],"Furthermore,":[183],"implements":[185],"mask":[186],"tuning":[187],"restore":[189],"accuracy":[191],"without":[192],"additional":[193],"minimizing":[195],"dependency":[196],"external":[198],"data":[199],"resources":[200],"power.":[203],"Experimental":[204],"results":[205],"demonstrate":[206],"well":[210],"across":[211],"various":[214],"architectures.":[215],"When":[216],"20%":[218],"LLaMA-2-13B":[220],"parameters,":[221],"requires":[223],"less":[224],"than":[225,232],"8":[226],"GB":[227],"memory,":[229],"maintaining":[230],"more":[231],"90%":[233],"accuracy.":[235],"Meanwhile,":[236],"improves":[238],"3.54%":[241],"compared":[242],"LLM-Pruner,":[244],"while":[245],"reducing":[246],"90%.":[250]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-09T07:00:12.390032","created_date":"2025-11-06T00:00:00"}
