{"id":"https://openalex.org/W6948041793","doi":"https://doi.org/10.48550/arxiv.2503.05936","title":"CASP: Compression of Large Multimodal Models Based on Attention Sparsity","display_name":"CASP: Compression of Large Multimodal Models Based on Attention Sparsity","publication_year":2025,"publication_date":"2025-03-07","ids":{"openalex":"https://openalex.org/W6948041793","doi":"https://doi.org/10.48550/arxiv.2503.05936"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2503.05936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.05936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2503.05936","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Gholami, Mohsen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gholami, Mohsen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Akbari, Mohammad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Akbari, Mohammad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Cannons, Kevin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cannons, Kevin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Zhang, Yong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.04960000142455101,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.04960000142455101,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.043800000101327896,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.042500000447034836,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.732699990272522},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5613999962806702},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.532800018787384},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4146000146865845},{"id":"https://openalex.org/keywords/compression-ratio","display_name":"Compression ratio","score":0.4050000011920929},{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.38420000672340393},{"id":"https://openalex.org/keywords/image-compression","display_name":"Image compression","score":0.34529998898506165}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.732699990272522},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7035999894142151},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5613999962806702},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.532800018787384},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49950000643730164},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4146000146865845},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.4050000011920929},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4011000096797943},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.38420000672340393},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.34529998898506165},{"id":"https://openalex.org/C94835093","wikidata":"https://www.wikidata.org/wiki/Q3113333","display_name":"Data compression ratio","level":5,"score":0.3334999978542328},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C22789450","wikidata":"https://www.wikidata.org/wiki/Q420904","display_name":"Singular value decomposition","level":2,"score":0.2556000053882599}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2503.05936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.05936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2503.05936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.05936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1],"work,":[2],"we":[3,77],"propose":[4],"an":[5,21,108,130],"extreme":[6],"compression":[7,24,32,65,82],"technique":[8,83,119],"for":[9,26,33,84],"Large":[10,27],"Multimodal":[11],"Models":[12,29],"(LMMs).":[13],"While":[14],"previous":[15],"studies":[16],"have":[17],"explored":[18],"quantization":[19,102,118,124],"as":[20],"efficient":[22],"post-training":[23],"method":[25],"Language":[28],"(LLMs),":[30],"low-bit":[31],"multimodal":[34,44],"models":[35,45],"remains":[36],"under-explored.":[37],"The":[38],"redundant":[39],"nature":[40],"of":[41,67,132],"inputs":[42],"in":[43,47],"results":[46],"a":[48,80,89],"highly":[49],"sparse":[50],"attention":[51,60],"matrix.":[52],"We":[53],"theoretically":[54],"and":[55,70,96,120,127,136],"experimentally":[56],"demonstrate":[57],"that":[58],"the":[59,64,68,94],"matrix's":[61],"sparsity":[62],"bounds":[63],"error":[66],"Query":[69,95],"Key":[71,97],"weight":[72,98],"matrices.":[73],"Based":[74],"on":[75,93,107,134],"this,":[76],"introduce":[78],"CASP,":[79],"model":[81],"LMMs.":[85],"Our":[86],"approach":[87],"performs":[88],"data-aware":[90],"low-rank":[91],"decomposition":[92],"matrix,":[99],"followed":[100],"by":[101,129],"across":[103],"all":[104],"layers":[105],"based":[106],"optimal":[109],"bit":[110],"allocation":[111],"process.":[112],"CASP":[113],"is":[114],"compatible":[115],"with":[116],"any":[117],"enhances":[121],"state-of-the-art":[122],"2-bit":[123],"methods":[125],"(AQLM":[126],"QuIP#)":[128],"average":[131],"21%":[133],"image-":[135],"video-language":[137],"benchmarks.":[138]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
