{"id":"https://openalex.org/W7148317770","doi":"https://doi.org/10.48550/arxiv.2604.00757","title":"IWP: Token Pruning as Implicit Weight Pruning in Large Vision Language Models","display_name":"IWP: Token Pruning as Implicit Weight Pruning in Large Vision Language Models","publication_year":2026,"publication_date":"2026-04-01","ids":{"openalex":"https://openalex.org/W7148317770","doi":"https://doi.org/10.48550/arxiv.2604.00757"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.00757","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00757","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.00757","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132814694","display_name":"Dong-Jae Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Dong-Jae","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132829607","display_name":"Sunghyun Baek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baek, Sunghyun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132809859","display_name":"Junmo Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Junmo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9025999903678894,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9025999903678894,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.03620000183582306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.01720000058412552,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.7483999729156494},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5454999804496765},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5307000279426575},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.5223000049591064},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.47690001130104065},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.44029998779296875},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.43849998712539673},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.42149999737739563},{"id":"https://openalex.org/keywords/learning-to-rank","display_name":"Learning to rank","score":0.3939000070095062}],"concepts":[{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.7483999729156494},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6819000244140625},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5454999804496765},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5307000279426575},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.5223000049591064},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.47690001130104065},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47110000252723694},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.44029998779296875},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.43849998712539673},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.42149999737739563},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.3939000070095062},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.3763999938964844},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.3750999867916107},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3686000108718872},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3474000096321106},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3384999930858612},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3319000005722046},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.3061000108718872},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3050999939441681},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.28850001096725464},{"id":"https://openalex.org/C4069607","wikidata":"https://www.wikidata.org/wiki/Q868732","display_name":"Aliasing","level":3,"score":0.28790000081062317},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.2815000116825104},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27889999747276306},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2770000100135803},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.00757","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00757","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.00757","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00757","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Vision":[1],"Language":[2],"Models":[3],"show":[4],"impressive":[5],"performance":[6,163],"across":[7],"image":[8],"and":[9,132,164],"video":[10],"understanding":[11],"tasks,":[12],"yet":[13],"their":[14],"computational":[15],"cost":[16],"grows":[17],"rapidly":[18],"with":[19,140],"the":[20,37,56,74,107,138,141],"number":[21],"of":[22,40,60,76,99],"visual":[23],"tokens.":[24],"Existing":[25],"token":[26,51],"pruning":[27,52,91,172],"methods":[28],"mitigate":[29],"this":[30,43,113],"issue":[31],"through":[32],"empirical":[33],"approaches":[34],"while":[35,166],"overlooking":[36],"internal":[38],"mechanism":[39],"attention.":[41,61],"In":[42],"paper,":[44],"we":[45,121,144],"propose":[46],"a":[47,84,123,128,158],"novel":[48,124],"training":[49],"free":[50],"framework":[53],"grounded":[54],"in":[55,119],"dual":[57,109],"form":[58],"perspective":[59,114,169],"We":[62],"reformulate":[63],"attention":[64,118],"as":[65],"an":[66,96],"implicit":[67],"linear":[68],"layer":[69],"whose":[70],"weight":[71,110],"matrix":[72],"is":[73],"sum":[75],"rank":[77,101],"1":[78,102],"outer":[79],"products,":[80],"each":[81],"generated":[82],"by":[83],"single":[85],"token's":[86,129],"key":[87],"value":[88],"pair.":[89],"Token":[90],"thus":[92],"reduces":[93],"to":[94,115],"selecting":[95],"optimal":[97],"subset":[98,139],"these":[100],"updates":[103],"that":[104,154],"best":[105],"approximates":[106],"original":[108],"matrix.":[111],"Extending":[112],"standard":[116],"softmax":[117],"LVLMs,":[120],"derive":[122],"metric":[125],"quantifying":[126],"both":[127],"information":[130,133],"magnitude":[131],"duplication.":[134],"To":[135],"efficiently":[136],"select":[137],"proposed":[142],"metric,":[143],"introduce":[145],"Progressive":[146],"Chunked":[147],"Maximal":[148],"Marginal":[149],"Relevance.":[150],"Extensive":[151],"experiments":[152],"demonstrate":[153],"our":[155],"method":[156],"achieves":[157],"better":[159],"trade":[160],"off":[161],"between":[162],"efficiency,":[165],"providing":[167],"another":[168],"on":[170],"existing":[171],"approaches.":[173]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-03T00:00:00"}
