{"id":"https://openalex.org/W7164828888","doi":"https://doi.org/10.1145/3805622.3810733","title":"SHARP: Semantic Head-Aware Representation Pruning for Efficient MLLMs","display_name":"SHARP: Semantic Head-Aware Representation Pruning for Efficient MLLMs","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164828888","doi":"https://doi.org/10.1145/3805622.3810733"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810733","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810733","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810733","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087910125","display_name":"H F","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haifeng Ma","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China and Pengcheng Laboratory, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0008-9658-224X","affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China and Pengcheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025239624","display_name":"Mingyue Guo","orcid":"https://orcid.org/0009-0005-2348-7530"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyue Guo","raw_affiliation_strings":["Pengcheng Laboratory, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0005-2348-7530","affiliations":[{"raw_affiliation_string":"Pengcheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100680501","display_name":"Linhui Xiao","orcid":"https://orcid.org/0000-0003-2592-5264"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linhui Xiao","raw_affiliation_strings":["Pengcheng Laboratory, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-2592-5264","affiliations":[{"raw_affiliation_string":"Pengcheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101092469","display_name":"Qingfang Zheng","orcid":"https://orcid.org/0009-0006-7568-9318"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingfang Zheng","raw_affiliation_strings":["Pengcheng Laboratory, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0006-7568-9318","affiliations":[{"raw_affiliation_string":"Pengcheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028597017","display_name":"Qingming Huang","orcid":"https://orcid.org/0000-0001-7542-296X"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingming Huang","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China and Pengcheng Laboratory, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-7542-296X","affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China and Pengcheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93636862,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1701","last_page":"1705"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5180000066757202,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5180000066757202,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.15360000729560852,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.09179999679327011,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.8295000195503235},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7572000026702881},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7409999966621399},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6528000235557556},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5303000211715698},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.412200003862381},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3806999921798706},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.32030001282691956}],"concepts":[{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.8295000195503235},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8187999725341797},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7572000026702881},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7409999966621399},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6528000235557556},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6100999712944031},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5303000211715698},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4595000147819519},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43799999356269836},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.412200003862381},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3806999921798706},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2897000014781952},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C59415355","wikidata":"https://www.wikidata.org/wiki/Q3484781","display_name":"Text simplification","level":3,"score":0.2824000120162964},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.25760000944137573},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810733","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810733","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810733","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810733","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.40345752239227295}],"awards":[{"id":"https://openalex.org/G2973104440","display_name":null,"funder_award_id":"PCL2025A14","funder_id":"https://openalex.org/F4320318558","funder_display_name":"Peng Cheng Laboratory"},{"id":"https://openalex.org/G3840657713","display_name":null,"funder_award_id":"62441232","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4199693610","display_name":null,"funder_award_id":"62236008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6656811634","display_name":null,"funder_award_id":"62521007","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320318558","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2963518342","https://openalex.org/W2963622213","https://openalex.org/W2979382951","https://openalex.org/W4402727764","https://openalex.org/W4402754149","https://openalex.org/W4412888011","https://openalex.org/W4412888444","https://openalex.org/W4413156814","https://openalex.org/W4417300640","https://openalex.org/W7133223072","https://openalex.org/W7134164008"],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"large":[1],"language":[2],"models":[3],"(MLLMs)":[4],"suffer":[5],"from":[6,60,98],"high":[7],"inference":[8,119,159],"costs,":[9],"where":[10],"visual":[11,57,100],"tokens":[12],"dominate":[13],"the":[14,21,52,56,99,150,158],"input":[15],"sequence,":[16],"often":[17,45],"exceeding":[18],"90%":[19],"of":[20,63,149,157],"total":[22],"length.":[23],"Current":[24],"acceleration":[25],"strategies":[26],"typically":[27],"employ":[28],"inference-time":[29],"token":[30,138],"pruning,":[31],"categorized":[32],"into":[33],"two":[34],"main":[35],"paradigms:":[36],"internal":[37],"LLM":[38],"pruning":[39,139],"and":[40],"pre-LLM":[41],"pruning.":[42],"The":[43,79],"former":[44],"undermines":[46],"hardware":[47],"optimizations":[48],"like":[49],"FlashAttention,":[50,145],"while":[51,153],"latter,":[53],"applied":[54],"after":[55],"encoder,":[58],"suffers":[59],"a":[61,72,103],"lack":[62],"textual":[64],"query":[65],"guidance.":[66],"In":[67],"this":[68],"study,":[69],"we":[70],"propose":[71],"Semantic":[73],"Head-Aware":[74],"Representation":[75],"Pruning":[76],"(SHARP)":[77],"framework.":[78],"key":[80],"idea":[81],"is":[82],"to":[83,110,136],"identify":[84],"pivotal":[85],"attention":[86],"heads":[87],"that":[88,128],"effectively":[89],"capture":[90],"cross-modal":[91],"alignment":[92,109],"by":[93],"measuring":[94],"text\u2013image":[95],"affinity":[96],"derived":[97],"encoder.":[101],"Such":[102],"design":[104],"not":[105],"only":[106,155],"leverages":[107],"semantic":[108],"preserve":[111],"task-relevant":[112],"information":[113],"but":[114],"also":[115],"ensures":[116],"significant":[117],"end-to-end":[118],"acceleration.":[120],"Experiments":[121],"on":[122,142],"widely":[123],"used":[124],"vision\u2013language":[125],"benchmarks":[126],"demonstrate":[127],"our":[129],"approach":[130],"achieves":[131],"superior":[132],"accuracy\u2013efficiency":[133],"trade-offs":[134],"compared":[135],"previous":[137],"strategies.":[140],"Notably,":[141],"LLaVA-1.5-7B":[143],"with":[144],"SHARP":[146],"retains":[147],"95%":[148],"original":[151],"performance":[152],"requiring":[154],"63%":[156],"latency,":[160],"underscoring":[161],"its":[162],"potential":[163],"for":[164],"deploying":[165],"efficient":[166],"MLLMs.":[167]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
