{"id":"https://openalex.org/W7140200604","doi":"https://doi.org/10.48550/arxiv.2603.21701","title":"Rethinking Token Reduction for Large Vision-Language Models","display_name":"Rethinking Token Reduction for Large Vision-Language Models","publication_year":2026,"publication_date":"2026-03-23","ids":{"openalex":"https://openalex.org/W7140200604","doi":"https://doi.org/10.48550/arxiv.2603.21701"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.21701","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.21701","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.21701","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wang, Yi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhang, Haofei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Haofei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Huang, Qihan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Qihan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Cao, Anda","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Anda","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Fang, Gongfan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Gongfan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jin, Xuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Xuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Song, Jie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Jie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Song, Mingli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Mingli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Wang, Xinchao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xinchao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9739000201225281,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9739000201225281,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.004999999888241291,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.003100000089034438,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.7027999758720398},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.6812999844551086},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6644999980926514},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6568999886512756},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6373000144958496},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5490999817848206},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.41769999265670776},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.3382999897003174}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7856000065803528},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.7027999758720398},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.6812999844551086},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6644999980926514},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6568999886512756},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6373000144958496},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5490999817848206},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5353000164031982},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4740000069141388},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.41769999265670776},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.3382999897003174},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.33149999380111694},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3059999942779541},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.30219998955726624},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3005000054836273},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.28130000829696655},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C111696304","wikidata":"https://www.wikidata.org/wiki/Q2303697","display_name":"Sorting","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2590000033378601}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.21701","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.21701","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.21701","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.21701","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Vision-Language":[1],"Models":[2],"(LVLMs)":[3],"excel":[4],"in":[5],"visual":[6,13],"understanding":[7],"and":[8,56,84,147,178],"reasoning,":[9],"but":[10],"the":[11,37,80,125],"excessive":[12],"tokens":[14],"lead":[15],"to":[16,59,97,110],"high":[17],"inference":[18],"costs.":[19,172],"Although":[20],"recent":[21],"token":[22,134],"reduction":[23,65,103,135],"methods":[24],"mitigate":[25],"this":[26,114,155],"issue,":[27],"they":[28],"mainly":[29],"target":[30],"single-turn":[31],"Visual":[32],"Question":[33],"Answering":[34],"(VQA),":[35],"leaving":[36],"more":[38],"practical":[39],"multi-turn":[40,98],"VQA":[41],"(MT-VQA)":[42],"scenario":[43],"largely":[44],"unexplored.":[45],"MT-VQA":[46,176],"introduces":[47],"additional":[48],"challenges,":[49],"as":[50,106,136,145],"subsequent":[51,89],"questions":[52],"are":[53],"unknown":[54],"beforehand":[55],"may":[57],"refer":[58],"arbitrary":[60],"image":[61],"regions,":[62],"making":[63],"existing":[64,142],"strategies":[66],"ineffective.":[67],"Specifically,":[68],"current":[69],"approaches":[70],"fall":[71],"into":[72,149],"two":[73],"categories:":[74],"prompt-dependent":[75],"methods,":[76],"which":[77],"bias":[78],"toward":[79],"initial":[81],"text":[82],"prompt":[83],"discard":[85],"information":[86],"useful":[87],"for":[88],"turns;":[90],"prompt-agnostic":[91,120],"ones,":[92],"which,":[93],"though":[94],"technically":[95],"applicable":[96],"settings,":[99],"rely":[100],"on":[101,175],"heuristic":[102,128],"metrics":[104],"such":[105,144],"attention":[107],"scores,":[108],"leading":[109],"suboptimal":[111],"performance.":[112],"In":[113],"paper,":[115],"we":[116,157],"propose":[117],"a":[118,137,150,159],"learning-based":[119],"method,":[121],"termed":[122],"MetaCompress,":[123],"overcoming":[124],"limitations":[126],"of":[127,164],"designs.":[129],"We":[130],"begin":[131],"by":[132],"formulating":[133],"learnable":[138],"compression":[139,167],"mapping,":[140],"unifying":[141],"formats":[143],"pruning":[146],"merging":[148],"single":[151],"learning":[152,165],"objective.":[153],"Upon":[154],"formulation,":[156],"introduce":[158],"data-efficient":[160],"training":[161],"paradigm":[162],"capable":[163],"optimal":[166],"mappings":[168],"with":[169],"limited":[170],"computational":[171],"Extensive":[173],"experiments":[174],"benchmarks":[177],"across":[179,194],"multiple":[180],"LVLM":[181],"architectures":[182],"demonstrate":[183],"that":[184],"MetaCompress":[185],"achieves":[186],"superior":[187],"efficiency-accuracy":[188],"trade-offs":[189],"while":[190],"maintaining":[191],"strong":[192],"generalization":[193],"dialogue":[195],"turns.":[196],"Our":[197],"code":[198],"is":[199],"available":[200],"at":[201],"https://github.com/MArSha1147/MetaCompress.":[202]},"counts_by_year":[],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2026-03-25T00:00:00"}
