{"id":"https://openalex.org/W7154758010","doi":"https://doi.org/10.48550/arxiv.2604.15153","title":"Compressing Sequences in the Latent Embedding Space: $K$-Token Merging for Large Language Models","display_name":"Compressing Sequences in the Latent Embedding Space: $K$-Token Merging for Large Language Models","publication_year":2026,"publication_date":"2026-04-16","ids":{"openalex":"https://openalex.org/W7154758010","doi":"https://doi.org/10.48550/arxiv.2604.15153"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.15153","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15153","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.15153","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133831030","display_name":"Zihao Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Zihao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078256928","display_name":"John Harvill","orcid":"https://orcid.org/0000-0003-3633-6756"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Harvill, John","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017213607","display_name":"Ziwei Fan","orcid":"https://orcid.org/0000-0001-5445-2203"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Ziwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133895176","display_name":"Yizhou Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yizhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133897863","display_name":"Hao Ding","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133828998","display_name":"Hao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5133831030"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3763999938964844,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3763999938964844,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.11060000211000443,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.049800001084804535,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7685999870300293},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7143999934196472},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.572700023651123},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5393999814987183},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.517799973487854},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.5002999901771545},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4936000108718872}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7685999870300293},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7447999715805054},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7143999934196472},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.572700023651123},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5393999814987183},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.517799973487854},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.5002999901771545},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4936000108718872},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.4690999984741211},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.45750001072883606},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4381999969482422},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4090999960899353},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3977000117301941},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.35510000586509705},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3467000126838684},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.34310001134872437},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2849000096321106},{"id":"https://openalex.org/C2776848632","wikidata":"https://www.wikidata.org/wiki/Q853463","display_name":"Clipping (morphology)","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.25999999046325684},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2572999894618988},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.15153","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15153","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.15153","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15153","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5083916187286377}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"incur":[4],"significant":[5],"computational":[6],"and":[7,46,108],"memory":[8],"costs":[9],"when":[10],"processing":[11],"long":[12],"prompts,":[13],"as":[14],"full":[15],"self-attention":[16],"scales":[17],"quadratically":[18],"with":[19,132],"input":[20,129],"length.":[21],"Token":[22],"compression":[23,63],"aims":[24],"to":[25,127],"address":[26],"this":[27,55],"challenge":[28],"by":[29,87],"reducing":[30],"the":[31,50,95,118],"number":[32],"of":[33,70,121],"tokens":[34],"representing":[35],"inputs.":[36],"However,":[37],"existing":[38],"prompt-compression":[39],"approaches":[40],"primarily":[41],"operate":[42],"in":[43,49,94],"token":[44,72],"space":[45],"overlook":[47],"inefficiencies":[48],"latent":[51],"embedding":[52,77],"space.":[53],"In":[54],"paper,":[56],"we":[57],"propose":[58],"K-Token":[59,114],"Merging,":[60],"a":[61,75,79,88],"latent-space":[62],"framework":[64],"that":[65,113],"merges":[66],"each":[67],"contiguous":[68],"block":[69],"K":[71],"embeddings":[73],"into":[74],"single":[76],"via":[78],"lightweight":[80],"encoder.":[81],"The":[82],"compressed":[83],"sequence":[84],"is":[85,137],"processed":[86],"LoRA-adapted":[89],"LLM,":[90],"while":[91],"generation":[92],"remains":[93],"original":[96],"vocabulary.":[97],"Experiments":[98],"on":[99,117],"structural":[100],"reasoning":[101],"(Textualized":[102],"Tree),":[103],"sentiment":[104],"classification":[105],"(Amazon":[106],"Reviews),":[107],"code":[109],"editing":[110],"(CommitPackFT)":[111],"show":[112],"Merging":[115],"lies":[116],"Pareto":[119],"frontier":[120],"performance":[122,134],"vs.":[123],"compression,":[124],"achieving":[125],"up":[126],"75%":[128],"length":[130],"reduction":[131],"minimal":[133],"degradation.":[135],"Code":[136],"available":[138],"at":[139],"https://github.com/shsjxzh/K-Token-Merging.":[140]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-18T00:00:00"}
