{"id":"https://openalex.org/W7158050086","doi":"https://doi.org/10.1145/3805621.3807608","title":"Why Smaller Is Slower? Dimensional Misalignment in Compressed LLMs","display_name":"Why Smaller Is Slower? Dimensional Misalignment in Compressed LLMs","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7158050086","doi":"https://doi.org/10.1145/3805621.3807608"},"language":null,"primary_location":{"id":"doi:10.1145/3805621.3807608","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805621.3807608","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth European Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805621.3807608","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080310755","display_name":"Jihao Xin","orcid":"https://orcid.org/0000-0002-8117-9422"},"institutions":[{"id":"https://openalex.org/I71920554","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38","country_code":"SA","type":"education","lineage":["https://openalex.org/I71920554"]}],"countries":["SA"],"is_corresponding":true,"raw_author_name":"Jihao Xin","raw_affiliation_strings":["KAUST, Thuwal, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0002-8117-9422","affiliations":[{"raw_affiliation_string":"KAUST, Thuwal, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133612824","display_name":"Tian Lyu","orcid":null},"institutions":[{"id":"https://openalex.org/I71920554","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38","country_code":"SA","type":"education","lineage":["https://openalex.org/I71920554"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Tian Lyu","raw_affiliation_strings":["KAUST, Thuwal, Saudi Arabia"],"raw_orcid":"https://orcid.org/0009-0000-1509-0051","affiliations":[{"raw_affiliation_string":"KAUST, Thuwal, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071141077","display_name":"Qilong Pan","orcid":"https://orcid.org/0000-0003-1964-8168"},"institutions":[{"id":"https://openalex.org/I71920554","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38","country_code":"SA","type":"education","lineage":["https://openalex.org/I71920554"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Qilong Pan","raw_affiliation_strings":["KAUST, Thuwal, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0003-1964-8168","affiliations":[{"raw_affiliation_string":"KAUST, Thuwal, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024001271","display_name":"Kesen Wang","orcid":"https://orcid.org/0000-0002-8820-1629"},"institutions":[{"id":"https://openalex.org/I120238654","display_name":"Saudi Electronic University","ror":"https://ror.org/05ndh7v49","country_code":"SA","type":"education","lineage":["https://openalex.org/I120238654"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Kesen Wang","raw_affiliation_strings":["HUMAIN, Riyadh, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0002-8820-1629","affiliations":[{"raw_affiliation_string":"HUMAIN, Riyadh, Saudi Arabia","institution_ids":["https://openalex.org/I120238654"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5134831846","display_name":"Marco Canini","orcid":"https://orcid.org/0000-0002-5051-4283"},"institutions":[{"id":"https://openalex.org/I71920554","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38","country_code":"SA","type":"education","lineage":["https://openalex.org/I71920554"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Marco Canini","raw_affiliation_strings":["KAUST, Thuwal, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0002-5051-4283","affiliations":[{"raw_affiliation_string":"KAUST, Thuwal, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5080310755"],"corresponding_institution_ids":["https://openalex.org/I71920554"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.97047709,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"49","last_page":"59"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5446000099182129,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5446000099182129,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.060100000351667404,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.03519999980926514,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.590399980545044},{"id":"https://openalex.org/keywords/tracing","display_name":"Tracing","score":0.5120000243186951},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4683000147342682},{"id":"https://openalex.org/keywords/singular-value-decomposition","display_name":"Singular value decomposition","score":0.45089998841285706},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.43639999628067017},{"id":"https://openalex.org/keywords/uncompressed-video","display_name":"Uncompressed video","score":0.38350000977516174},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.37380000948905945},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.36309999227523804}],"concepts":[{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.590399980545044},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.5120000243186951},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4799000024795532},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4683000147342682},{"id":"https://openalex.org/C22789450","wikidata":"https://www.wikidata.org/wiki/Q420904","display_name":"Singular value decomposition","level":2,"score":0.45089998841285706},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.43639999628067017},{"id":"https://openalex.org/C162478608","wikidata":"https://www.wikidata.org/wiki/Q4011369","display_name":"Uncompressed video","level":4,"score":0.38350000977516174},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.37380000948905945},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.36309999227523804},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.35839998722076416},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35690000653266907},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3370000123977661},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32739999890327454},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.31290000677108765},{"id":"https://openalex.org/C50335755","wikidata":"https://www.wikidata.org/wiki/Q483247","display_name":"Phenomenon","level":2,"score":0.30640000104904175},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.30079999566078186},{"id":"https://openalex.org/C84945661","wikidata":"https://www.wikidata.org/wiki/Q7366567","display_name":"Root cause","level":2,"score":0.2849000096321106},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.28139999508857727},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.2784000039100647},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2750999927520752},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C121483023","wikidata":"https://www.wikidata.org/wiki/Q7298343","display_name":"Ray tracing (physics)","level":2,"score":0.2653999924659729},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.26010000705718994},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.2531000077724457}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805621.3807608","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805621.3807608","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth European Workshop on Machine Learning and Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805621.3807608","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805621.3807608","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth European Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W2965862774","https://openalex.org/W4382202914","https://openalex.org/W4387321091","https://openalex.org/W4415797154"],"related_works":[],"abstract_inverted_index":{"Post-training":[0],"compression":[1],"reduces":[2],"LLM":[3],"parameter":[4],"counts":[5],"but":[6],"often":[7],"produces":[8],"irregular":[9],"tensor":[10],"dimensions":[11,48,82],"that":[12,40],"degrade":[13],"GPU":[14,53],"performance\u2014a":[15],"phenomenon":[16],"we":[17],"call":[18],"dimensional":[19],"misalignment.":[20],"We":[21],"present":[22],"a":[23],"full-stack":[24],"analysis":[25],"tracing":[26],"root":[27],"causes":[28],"at":[29],"three":[30],"levels:":[31],"framework,":[32],"library,":[33],"and":[34],"hardware.":[35],"The":[36],"key":[37],"insight":[38],"is":[39],"model":[41],"inference":[42],"becomes":[43],"slower":[44],"because":[45,78],"the":[46,52,75],"resulting":[47],"are":[49,83],"unfriendly":[50],"with":[51,60],"execution":[54],"stack.":[55],"For":[56],"example,":[57],"compressing":[58],"Llama-3-8B":[59],"activation-aware":[61],"singular":[62],"value":[63],"decomposition":[64],"(ASVD)":[65],"has":[66],"15%":[67],"fewer":[68],"parameters":[69],"yet":[70],"runs":[71],"no":[72],"faster":[73],"than":[74],"uncompressed":[76],"baseline,":[77],"95%":[79],"of":[80],"its":[81],"misaligned.":[84]},"counts_by_year":[],"updated_date":"2026-04-30T06:11:10.768123","created_date":"2026-04-30T00:00:00"}
