{"id":"https://openalex.org/W4416036941","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.165","title":"TokenSkip: Controllable Chain-of-Thought Compression in LLMs","display_name":"TokenSkip: Controllable Chain-of-Thought Compression in LLMs","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036941","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.165"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.165","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.165","pdf_url":"https://aclanthology.org/2025.emnlp-main.165.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.165.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010537052","display_name":"Hanbing Xia","orcid":"https://orcid.org/0000-0003-1815-1045"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Heming Xia","raw_affiliation_strings":["Department of Computing , The Hong Kong Polytechnic University University of Science and Technology of China"],"affiliations":[{"raw_affiliation_string":"Department of Computing , The Hong Kong Polytechnic University University of Science and Technology of China","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116335566","display_name":"Chak Tou Leong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chak Tou Leong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100368510","display_name":"Wenjie Wang","orcid":"https://orcid.org/0000-0001-6426-0416"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenjie Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101599249","display_name":"Yongqi Li","orcid":"https://orcid.org/0000-0002-6932-4228"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yongqi Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100408983","display_name":"Wenjie Li","orcid":"https://orcid.org/0000-0002-7360-8864"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Wenjie Li","raw_affiliation_strings":["Department of Computing , The Hong Kong Polytechnic University University of Science and Technology of China"],"affiliations":[{"raw_affiliation_string":"Department of Computing , The Hong Kong Polytechnic University University of Science and Technology of China","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5010537052"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":6.2297,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.96558948,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3351","last_page":"3363"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10714","display_name":"Software-Defined Networks and 5G","score":0.09790000319480896,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10714","display_name":"Software-Defined Networks and 5G","score":0.09790000319480896,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10138","display_name":"Network Traffic and Congestion Control","score":0.0812000036239624,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10742","display_name":"Peer-to-Peer Network Technologies","score":0.05920000001788139,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.4480000138282776},{"id":"https://openalex.org/keywords/welding","display_name":"Welding","score":0.30959999561309814},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.2766000032424927},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.26930001378059387}],"concepts":[{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.4480000138282776},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3716999888420105},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.3212999999523163},{"id":"https://openalex.org/C19474535","wikidata":"https://www.wikidata.org/wiki/Q131172","display_name":"Welding","level":2,"score":0.30959999561309814},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.26930001378059387},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.25679999589920044},{"id":"https://openalex.org/C199639397","wikidata":"https://www.wikidata.org/wiki/Q1788588","display_name":"Engineering drawing","level":1,"score":0.23720000684261322},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.2296999990940094}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.165","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.165","pdf_url":"https://aclanthology.org/2025.emnlp-main.165.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.165","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.165","pdf_url":"https://aclanthology.org/2025.emnlp-main.165.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322598","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036941.pdf","grobid_xml":"https://content.openalex.org/works/W4416036941.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Chain-of-Thought":[0],"(CoT)":[1],"has":[2],"been":[3],"proven":[4],"effective":[5,100],"in":[6,56,127,164],"enhancing":[7],"the":[8,27,42,65,75,123],"reasoning":[9,38,89,135,142],"capabilities":[10],"of":[11,29,45,78,125],"large":[12],"language":[13],"models":[14,119],"(LLMs).Recent":[15],"advancements,":[16],"such":[17],"as":[18],"OpenAI's":[19],"o1":[20],"and":[21,83,120,162],"DeepSeek-R1,":[22],"suggest":[23],"that":[24,85,102],"scaling":[25],"up":[26],"length":[28],"CoT":[30,49,66,81,114,129],"sequences":[31],"during":[32],"inference":[33,57],"could":[34],"further":[35],"boost":[36],"LLM":[37,46],"performance.However,":[39],"due":[40],"to":[41,52,88,105,139,148],"autoregressive":[43],"nature":[44],"decoding,":[47],"longer":[48],"outputs":[50,82],"lead":[51],"a":[53,97,155],"linear":[54],"increase":[55],"latency,":[58],"adversely":[59],"affecting":[60],"user":[61],"experience,":[62],"particularly":[63],"when":[64,137],"exceeds":[67],"10,000":[68],"tokens.To":[69],"address":[70],"this":[71,92],"limitation,":[72],"we":[73,94],"analyze":[74],"semantic":[76],"importance":[77],"tokens":[79,143],"within":[80],"reveal":[84],"their":[86],"contributions":[87],"vary.Building":[90],"on":[91,150],"insight,":[93],"propose":[95],"TokenSkip,":[96],"simple":[98],"yet":[99],"approach":[101],"enables":[103],"LLMs":[104],"selectively":[106],"skip":[107],"less":[108,153],"important":[109],"tokens,":[110],"allowing":[111],"for":[112],"controllable":[113],"compression.Extensive":[115],"experiments":[116],"across":[117],"various":[118],"tasks":[121],"demonstrate":[122],"effectiveness":[124],"TokenSkip":[126],"reducing":[128],"token":[130],"usage":[131],"while":[132],"preserving":[133],"strong":[134],"performance.Notably,":[136],"applied":[138],"Qwen2.5-14B-Instruct,TokenSkip":[140],"reduces":[141],"by":[144],"40%":[145],"(from":[146],"313":[147],"181)":[149],"GSM8K,":[151],"with":[152],"than":[154],"0.4%":[156],"performance":[157],"drop.We":[158],"release":[159],"our":[160],"code":[161],"checkpoints":[163],"https:":[165],"//github.com/hemingkx/TokenSkip.":[166]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-11-08T00:00:00"}
