{"id":"https://openalex.org/W4412818278","doi":"https://doi.org/10.1007/s40747-025-02019-z","title":"A review of state-of-the-art techniques for large language model compression","display_name":"A review of state-of-the-art techniques for large language model compression","publication_year":2025,"publication_date":"2025-08-01","ids":{"openalex":"https://openalex.org/W4412818278","doi":"https://doi.org/10.1007/s40747-025-02019-z"},"language":"en","primary_location":{"id":"doi:10.1007/s40747-025-02019-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-025-02019-z","pdf_url":null,"source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1007/s40747-025-02019-z","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008778243","display_name":"Pierre V. Dantas","orcid":"https://orcid.org/0000-0001-6390-9340"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Pierre V. Dantas","raw_affiliation_strings":["Department of Computer Science, University of Manchester, Manchester, UK"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057689302","display_name":"Lucas C. Cordeiro","orcid":"https://orcid.org/0000-0002-6235-4272"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Lucas C. Cordeiro","raw_affiliation_strings":["Department of Computer Science, University of Manchester, Manchester, UK"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004836948","display_name":"S. S. Waldir","orcid":"https://orcid.org/0000-0003-3095-0042"},"institutions":[{"id":"https://openalex.org/I62885914","display_name":"Universidade Federal do Amazonas","ror":"https://ror.org/02263ky35","country_code":"BR","type":"education","lineage":["https://openalex.org/I62885914"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Waldir S. S. Junior","raw_affiliation_strings":["Department of Electrical Engineering, Federal University of Amazonas (UFAM), Manaus, Brazil"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Federal University of Amazonas (UFAM), Manaus, Brazil","institution_ids":["https://openalex.org/I62885914"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5008778243"],"corresponding_institution_ids":["https://openalex.org/I28407311"],"apc_list":{"value":1320,"currency":"GBP","value_usd":1619},"apc_paid":{"value":1320,"currency":"GBP","value_usd":1619},"fwci":9.6054,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.97692035,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":"11","issue":"9","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computational-intelligence","display_name":"Computational intelligence","score":0.5667575001716614},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5054173469543457},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.4966500401496887},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4780026972293854},{"id":"https://openalex.org/keywords/state-of-art","display_name":"State of art","score":0.4300377368927002},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2772424817085266},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.18470290303230286},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.14743837714195251},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.14176595211029053}],"concepts":[{"id":"https://openalex.org/C139502532","wikidata":"https://www.wikidata.org/wiki/Q1122090","display_name":"Computational intelligence","level":2,"score":0.5667575001716614},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5054173469543457},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.4966500401496887},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4780026972293854},{"id":"https://openalex.org/C3018574109","wikidata":"https://www.wikidata.org/wiki/Q329338","display_name":"State of art","level":2,"score":0.4300377368927002},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2772424817085266},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.18470290303230286},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14743837714195251},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.14176595211029053},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/s40747-025-02019-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-025-02019-z","pdf_url":null,"source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/39081e27-cda4-4404-9c39-fff371ed2d60","is_oa":false,"landing_page_url":"https://research.manchester.ac.uk/en/publications/39081e27-cda4-4404-9c39-fff371ed2d60","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Dantas, P V, Cordeiro, L C & Junior, W S S 2025, 'A Review of State-of-the-Art Techniques for Large Language Model Compression', Complex & Intelligent Systems.","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:doaj.org/article:5e39571cb40f4a52876fc86ac5911bcf","is_oa":true,"landing_page_url":"https://doaj.org/article/5e39571cb40f4a52876fc86ac5911bcf","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Complex & Intelligent Systems, Vol 11, Iss 9, Pp 1-40 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s40747-025-02019-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-025-02019-z","pdf_url":null,"source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":103,"referenced_works":["https://openalex.org/W1528917452","https://openalex.org/W1821462560","https://openalex.org/W2099253875","https://openalex.org/W2101807845","https://openalex.org/W2111935653","https://openalex.org/W2116341502","https://openalex.org/W2156150815","https://openalex.org/W2285660444","https://openalex.org/W2289252105","https://openalex.org/W2300242332","https://openalex.org/W2319920447","https://openalex.org/W2604319603","https://openalex.org/W2765872016","https://openalex.org/W2886851211","https://openalex.org/W2963122961","https://openalex.org/W2964259004","https://openalex.org/W2974817986","https://openalex.org/W2979439447","https://openalex.org/W2980294132","https://openalex.org/W2981972129","https://openalex.org/W2982083293","https://openalex.org/W2998218113","https://openalex.org/W3004543888","https://openalex.org/W3012561096","https://openalex.org/W3034368386","https://openalex.org/W3035332806","https://openalex.org/W3040024858","https://openalex.org/W3044091840","https://openalex.org/W3044604993","https://openalex.org/W3089944207","https://openalex.org/W3102927640","https://openalex.org/W3105966348","https://openalex.org/W3108124733","https://openalex.org/W3133450183","https://openalex.org/W3137147200","https://openalex.org/W3138154797","https://openalex.org/W3164045210","https://openalex.org/W3171889065","https://openalex.org/W3173604634","https://openalex.org/W3174469560","https://openalex.org/W3174510164","https://openalex.org/W3177265267","https://openalex.org/W3187174779","https://openalex.org/W3196438592","https://openalex.org/W3197904945","https://openalex.org/W3208642157","https://openalex.org/W4200176867","https://openalex.org/W4206742551","https://openalex.org/W4220973222","https://openalex.org/W4223908421","https://openalex.org/W4253012315","https://openalex.org/W4286742485","https://openalex.org/W4297813615","https://openalex.org/W4309125012","https://openalex.org/W4311991106","https://openalex.org/W4320016074","https://openalex.org/W4375859932","https://openalex.org/W4378509449","https://openalex.org/W4379260375","https://openalex.org/W4379511033","https://openalex.org/W4379743664","https://openalex.org/W4382239191","https://openalex.org/W4382318854","https://openalex.org/W4385175220","https://openalex.org/W4385571011","https://openalex.org/W4386580453","https://openalex.org/W4386692383","https://openalex.org/W4386826409","https://openalex.org/W4389296655","https://openalex.org/W4389500833","https://openalex.org/W4390136547","https://openalex.org/W4391621451","https://openalex.org/W4391926995","https://openalex.org/W4392367398","https://openalex.org/W4393004490","https://openalex.org/W4395065783","https://openalex.org/W4399317863","https://openalex.org/W4399530687","https://openalex.org/W4400002903","https://openalex.org/W4400280901","https://openalex.org/W4400647614","https://openalex.org/W4400720190","https://openalex.org/W4401044216","https://openalex.org/W4401072082","https://openalex.org/W4401226076","https://openalex.org/W4401474635","https://openalex.org/W4401609110","https://openalex.org/W4401682612","https://openalex.org/W4401726555","https://openalex.org/W4402137675","https://openalex.org/W4402742301","https://openalex.org/W4402967736","https://openalex.org/W4403221502","https://openalex.org/W4403941865","https://openalex.org/W4404129974","https://openalex.org/W4404491734","https://openalex.org/W4404918643","https://openalex.org/W4405527989","https://openalex.org/W4406072150","https://openalex.org/W4406266041","https://openalex.org/W4408146288","https://openalex.org/W4408799708","https://openalex.org/W6891689842"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Abstract":[0],"The":[1],"rapid":[2],"advancement":[3],"of":[4,44,178,205],"large":[5],"language":[6,15],"models":[7],"(LLMs)":[8],"has":[9],"driven":[10],"significant":[11],"progress":[12],"in":[13,35],"natural":[14],"processing":[16],"(NLP)":[17],"and":[18,32,53,68,87,101,109,119,141,163,181,191,210],"related":[19,28],"domains.":[20],"However,":[21],"their":[22],"deployment":[23,154],"remains":[24],"constrained":[25],"by":[26],"challenges":[27],"to":[29,61,130,147,174],"computation,":[30],"memory,":[31],"energy":[33,70],"efficiency\u2014particularly":[34],"real-world":[36,192],"applications.":[37],"This":[38,104,156],"work":[39,157],"presents":[40],"a":[41,128,170,203],"comprehensive":[42],"review":[43],"state-of-the-art":[45],"compression":[46,138,189],"techniques,":[47],"including":[48,93,207],"pruning,":[49],"quantization,":[50],"knowledge":[51],"distillation,":[52],"neural":[54],"architecture":[55],"search":[56],"(NAS),":[57],"which":[58],"collectively":[59],"aim":[60],"reduce":[62],"model":[63],"size,":[64],"enhance":[65,136],"inference":[66],"speed,":[67],"lower":[69],"consumption":[71],"while":[72],"maintaining":[73],"performance.":[74],"A":[75],"robust":[76],"evaluation":[77],"framework":[78],"is":[79],"introduced,":[80],"incorporating":[81],"traditional":[82],"metrics,":[83],"such":[84,111],"as":[85,112,127],"accuracy":[86],"perplexity":[88],"(PPL),":[89],"alongside":[90],"advanced":[91],"criteria":[92],"latency-accuracy":[94],"trade-offs,":[95],"parameter":[96],"efficiency,":[97],"multi-objective":[98],"Pareto":[99],"optimization,":[100],"fairness":[102],"considerations.":[103],"study":[105,195],"further":[106],"highlights":[107],"trends":[108],"challenges,":[110],"fairness-aware":[113],"compression,":[114],"robustness":[115],"against":[116],"adversarial":[117],"attacks,":[118],"hardware-specific":[120],"optimizations.":[121],"Additionally,":[122],"NAS-driven":[123],"strategies":[124],"are":[125,144],"explored":[126],"means":[129],"design":[131],"task-aware,":[132],"hardware-adaptive":[133],"architectures":[134],"that":[135],"LLM":[137],"efficiency.":[139],"Hybrid":[140],"adaptive":[142],"methods":[143],"also":[145,168],"examined":[146],"dynamically":[148],"optimize":[149],"computational":[150],"efficiency":[151],"across":[152,202],"diverse":[153],"scenarios.":[155],"not":[158],"only":[159],"synthesizes":[160],"recent":[161],"advancements":[162],"identifies":[164],"open":[165],"problems":[166],"but":[167],"proposes":[169],"structured":[171],"research":[172,190],"roadmap":[173],"guide":[175],"the":[176,186],"development":[177],"efficient,":[179],"scalable,":[180],"equitable":[182],"LLMs.":[183],"By":[184],"bridging":[185],"gap":[187],"between":[188],"deployment,":[193],"this":[194],"offers":[196],"actionable":[197],"insights":[198],"for":[199],"optimizing":[200],"LLMs":[201],"range":[204],"environments,":[206],"mobile":[208],"devices":[209],"large-scale":[211],"cloud":[212],"infrastructures.":[213]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
