{"id":"https://openalex.org/W4416252170","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227279","title":"Semantic Retention and Extreme Compression in LLMs: Can We Have Both?","display_name":"Semantic Retention and Extreme Compression in LLMs: Can We Have Both?","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416252170","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227279"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11227279","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227279","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120651020","display_name":"Stanislas Laborde","orcid":null},"institutions":[{"id":"https://openalex.org/I35298706","display_name":"ESIEA University","ror":"https://ror.org/00g6cx256","country_code":"FR","type":"education","lineage":["https://openalex.org/I35298706"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Stanislas Laborde","raw_affiliation_strings":["ESIEA,Learning, Data and Robotics (LDR) ESIEA Lab,Paris,France"],"affiliations":[{"raw_affiliation_string":"ESIEA,Learning, Data and Robotics (LDR) ESIEA Lab,Paris,France","institution_ids":["https://openalex.org/I35298706"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120651021","display_name":"Martin Cousseau","orcid":null},"institutions":[{"id":"https://openalex.org/I35298706","display_name":"ESIEA University","ror":"https://ror.org/00g6cx256","country_code":"FR","type":"education","lineage":["https://openalex.org/I35298706"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Martin Cousseau","raw_affiliation_strings":["ESIEA,Learning, Data and Robotics (LDR) ESIEA Lab,Paris,France"],"affiliations":[{"raw_affiliation_string":"ESIEA,Learning, Data and Robotics (LDR) ESIEA Lab,Paris,France","institution_ids":["https://openalex.org/I35298706"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085526718","display_name":"Antoun Yaacoub","orcid":"https://orcid.org/0009-0007-3698-9227"},"institutions":[{"id":"https://openalex.org/I35298706","display_name":"ESIEA University","ror":"https://ror.org/00g6cx256","country_code":"FR","type":"education","lineage":["https://openalex.org/I35298706"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Antoun Yaacoub","raw_affiliation_strings":["ESIEA,Learning, Data and Robotics (LDR) ESIEA Lab,Paris,France"],"affiliations":[{"raw_affiliation_string":"ESIEA,Learning, Data and Robotics (LDR) ESIEA Lab,Paris,France","institution_ids":["https://openalex.org/I35298706"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045983846","display_name":"Lionel Pr\u00e9vost","orcid":null},"institutions":[{"id":"https://openalex.org/I35298706","display_name":"ESIEA University","ror":"https://ror.org/00g6cx256","country_code":"FR","type":"education","lineage":["https://openalex.org/I35298706"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Lionel Prevost","raw_affiliation_strings":["ESIEA,Learning, Data and Robotics (LDR) ESIEA Lab,Paris,France"],"affiliations":[{"raw_affiliation_string":"ESIEA,Learning, Data and Robotics (LDR) ESIEA Lab,Paris,France","institution_ids":["https://openalex.org/I35298706"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5120651020"],"corresponding_institution_ids":["https://openalex.org/I35298706"],"apc_list":null,"apc_paid":null,"fwci":2.7163,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.9283683,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.22110000252723694,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.22110000252723694,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.20489999651908875,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.06239999830722809,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6434999704360962},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.5583000183105469},{"id":"https://openalex.org/keywords/data-compression-ratio","display_name":"Data compression ratio","score":0.5497999787330627},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5479999780654907},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.4796999990940094},{"id":"https://openalex.org/keywords/image-compression","display_name":"Image compression","score":0.41940000653266907},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.3926999866962433}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7125999927520752},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6434999704360962},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.5583000183105469},{"id":"https://openalex.org/C94835093","wikidata":"https://www.wikidata.org/wiki/Q3113333","display_name":"Data compression ratio","level":5,"score":0.5497999787330627},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5479999780654907},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.4796999990940094},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4666999876499176},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.41940000653266907},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.3926999866962433},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.36000001430511475},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3479999899864197},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3472999930381775},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.3337000012397766},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.32190001010894775},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3107999861240387},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C202708506","wikidata":"https://www.wikidata.org/wiki/Q7449050","display_name":"Semantic compression","level":5,"score":0.2711000144481659},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2612000107765198},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2567000091075897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11227279","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227279","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W4385571157","https://openalex.org/W4391136507","https://openalex.org/W4393147284","https://openalex.org/W4406650295","https://openalex.org/W4415954773"],"related_works":[],"abstract_inverted_index":{"The":[0],"exponential":[1],"growth":[2],"in":[3,63],"Large":[4],"Language":[5],"Model":[6],"(LLM)":[7],"deployment":[8],"has":[9],"intensified":[10],"the":[11,61,78,89,98,123],"need":[12],"for":[13],"efficient":[14],"model":[15,92,121],"compression":[16,43,93,126],"techniques":[17],"to":[18,57,117],"reduce":[19],"computational":[20],"and":[21,26,44,49,76,94],"memory":[22],"costs.":[23],"While":[24],"pruning":[25,48],"quantization":[27,50],"have":[28],"shown":[29],"promise,":[30],"their":[31],"combined":[32],"potential":[33],"remains":[34],"largely":[35],"unexplored.":[36],"In":[37],"this":[38],"paper,":[39],"we":[40,68],"examine":[41],"joint":[42],"how":[45],"strategically":[46],"combining":[47],"could":[51],"yield":[52],"superior":[53],"performance-to-compression":[54],"ratios":[55],"compared":[56,116],"single-method":[58],"approaches.":[59],"Recognizing":[60],"challenges":[62],"accurately":[64],"assessing":[65],"LLM":[66],"performance,":[67],"address":[69],"key":[70],"limitations":[71],"of":[72,100],"previous":[73],"evaluation":[74],"frameworks":[75],"introduce":[77],"Semantic":[79],"Retention":[80],"Compression":[81],"Rate":[82],"(SrCr),":[83],"a":[84,112],"novel":[85],"metric":[86],"that":[87,105],"quantifies":[88],"trade-off":[90],"between":[91],"semantic":[95],"preservation,":[96],"facilitating":[97],"optimization":[99],"pruning-quantization":[101],"configurations.":[102],"Experiments":[103],"demonstrate":[104],"our":[106],"recommended":[107],"combination":[108],"achieves,":[109],"on":[110],"average,":[111],"20%":[113],"performance":[114],"increase":[115],"an":[118],"equivalent":[119],"quantization-only":[120],"at":[122],"same":[124],"theoretical":[125],"rate.":[127]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-11-14T00:00:00"}
