{"id":"https://openalex.org/W7154383749","doi":"https://doi.org/10.48550/arxiv.2604.09595","title":"Why Smaller Is Slower? Dimensional Misalignment in Compressed LLMs","display_name":"Why Smaller Is Slower? Dimensional Misalignment in Compressed LLMs","publication_year":2026,"publication_date":"2026-03-05","ids":{"openalex":"https://openalex.org/W7154383749","doi":"https://doi.org/10.48550/arxiv.2604.09595"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.09595","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.09595","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.09595","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080310755","display_name":"Jihao Xin","orcid":"https://orcid.org/0000-0002-8117-9422"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xin, Jihao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133612824","display_name":"Tian Lyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lyu, Tian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133569566","display_name":"Qilong Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pan, Qilong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133563873","display_name":"Kesen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Kesen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133569617","display_name":"Marco Canini","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Canini, Marco","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5080310755"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.642300009727478,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.642300009727478,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.09220000356435776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.039900001138448715,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.6586999893188477},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6220999956130981},{"id":"https://openalex.org/keywords/tracing","display_name":"Tracing","score":0.5701000094413757},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.552299976348877},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5375000238418579},{"id":"https://openalex.org/keywords/knapsack-problem","display_name":"Knapsack problem","score":0.5077999830245972},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.4221999943256378},{"id":"https://openalex.org/keywords/uncompressed-video","display_name":"Uncompressed video","score":0.4189000129699707}],"concepts":[{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.6586999893188477},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6220999956130981},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5916000008583069},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.5701000094413757},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.552299976348877},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5375000238418579},{"id":"https://openalex.org/C113138325","wikidata":"https://www.wikidata.org/wiki/Q864457","display_name":"Knapsack problem","level":2,"score":0.5077999830245972},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49900001287460327},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.4221999943256378},{"id":"https://openalex.org/C162478608","wikidata":"https://www.wikidata.org/wiki/Q4011369","display_name":"Uncompressed video","level":4,"score":0.4189000129699707},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4016000032424927},{"id":"https://openalex.org/C131097465","wikidata":"https://www.wikidata.org/wiki/Q178898","display_name":"Gas compressor","level":2,"score":0.39329999685287476},{"id":"https://openalex.org/C22789450","wikidata":"https://www.wikidata.org/wiki/Q420904","display_name":"Singular value decomposition","level":2,"score":0.37459999322891235},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.3709000051021576},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3393000066280365},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.3328999876976013},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.32899999618530273},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.29490000009536743},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2806999981403351},{"id":"https://openalex.org/C84945661","wikidata":"https://www.wikidata.org/wiki/Q7366567","display_name":"Root cause","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.27059999108314514},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26159998774528503},{"id":"https://openalex.org/C190694206","wikidata":"https://www.wikidata.org/wiki/Q3276654","display_name":"Polygon (computer graphics)","level":3,"score":0.2612000107765198},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.09595","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.09595","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.09595","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.09595","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Post-training":[0],"compression":[1,94],"reduces":[2],"LLM":[3],"parameter":[4,112],"counts":[5],"but":[6],"often":[7],"produces":[8],"irregular":[9],"tensor":[10],"dimensions":[11,50,84,104],"that":[12,42,96],"degrade":[13],"GPU":[14,55],"performance":[15],"--":[16],"a":[17,25,92],"phenomenon":[18],"we":[19],"call":[20],"\\emph{dimensional":[21],"misalignment}.":[22],"We":[23,87,114],"present":[24],"full-stack":[26],"analysis":[27],"tracing":[28],"root":[29],"causes":[30],"at":[31],"three":[32],"levels:":[33],"framework,":[34],"library,":[35],"and":[36,101,121,126],"hardware.":[37],"The":[38],"key":[39],"insight":[40],"is":[41],"model":[43,134],"inference":[44],"becomes":[45],"slower":[46],"because":[47,80],"the":[48,54,77,110],"resulting":[49],"are":[51,85],"unfriendly":[52],"with":[53,62,119],"execution":[56],"stack.":[57],"For":[58],"example,":[59],"compressing":[60],"Llama-3-8B":[61,118],"activation-aware":[63],"singular":[64],"value":[65],"decomposition":[66],"(ASVD)":[67],"has":[68],"15\\%":[69],"fewer":[70],"parameters":[71],"yet":[72],"runs":[73],"no":[74],"faster":[75],"than":[76],"uncompressed":[78],"baseline,":[79],"95\\%":[81],"of":[82],"its":[83],"misaligned.":[86],"propose":[88],"\\textbf{GAC}":[89],"(GPU-Aligned":[90],"Compression),":[91],"new":[93],"paradigm":[95],"wraps":[97],"any":[98],"dimension-reducing":[99],"compressor":[100],"re-selects":[102],"hardware-aligned":[103],"via":[105],"multi-choice":[106],"knapsack":[107],"optimization":[108],"under":[109],"same":[111],"budget.":[113],"evaluate":[115],"GAC":[116],"on":[117],"ASVD":[120],"LLM-Pruner,":[122],"achieving":[123],"100\\%":[124],"alignment":[125],"recovering":[127],"up":[128],"to":[129],"1.5$\\times$":[130],"speedup":[131],"while":[132],"preserving":[133],"quality.":[135]},"counts_by_year":[],"updated_date":"2026-04-15T06:04:33.058270","created_date":"2026-04-15T00:00:00"}
