{"id":"https://openalex.org/W4410582295","doi":"https://doi.org/10.23919/date64628.2025.10992781","title":"EVASION: Efficient KV CAche CompreSsion vIa PrOduct QuaNtization","display_name":"EVASION: Efficient KV CAche CompreSsion vIa PrOduct QuaNtization","publication_year":2025,"publication_date":"2025-03-31","ids":{"openalex":"https://openalex.org/W4410582295","doi":"https://doi.org/10.23919/date64628.2025.10992781"},"language":"en","primary_location":{"id":"doi:10.23919/date64628.2025.10992781","is_oa":false,"landing_page_url":"https://doi.org/10.23919/date64628.2025.10992781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107269495","display_name":"Zongwu Wang","orcid":"https://orcid.org/0009-0003-2157-4927"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zongwu Wang","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017670541","display_name":"Fangxin Liu","orcid":"https://orcid.org/0000-0002-8769-293X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangxin Liu","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018569941","display_name":"Peng Xu","orcid":"https://orcid.org/0000-0003-0822-700X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Xu","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060999547","display_name":"Qingxiao Sun","orcid":"https://orcid.org/0000-0003-2927-362X"},"institutions":[{"id":"https://openalex.org/I204553293","display_name":"China University of Petroleum, Beijing","ror":"https://ror.org/041qf4r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I204553293"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingxiao Sun","raw_affiliation_strings":["China University of Petroleum-Beijing,SSSLab,Dept. of CST,China"],"affiliations":[{"raw_affiliation_string":"China University of Petroleum-Beijing,SSSLab,Dept. of CST,China","institution_ids":["https://openalex.org/I204553293"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071556047","display_name":"Jijun Zhao","orcid":"https://orcid.org/0000-0002-3263-7159"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junping Zhao","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053801300","display_name":"Li Jiang","orcid":"https://orcid.org/0000-0002-7353-8798"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Jiang","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5107269495"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":1.702,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.84833345,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.04399999976158142,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.04399999976158142,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13370","display_name":"Diverse Scientific and Economic Studies","score":0.015799999237060547,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.013799999840557575,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.613821268081665},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5816690921783447},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.471243679523468},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.4332989454269409},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.2907772660255432},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.1417081654071808},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11220479011535645},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.08685114979743958}],"concepts":[{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.613821268081665},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5816690921783447},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.471243679523468},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.4332989454269409},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2907772660255432},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.1417081654071808},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11220479011535645},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.08685114979743958}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/date64628.2025.10992781","is_oa":false,"landing_page_url":"https://doi.org/10.23919/date64628.2025.10992781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5952030521","display_name":null,"funder_award_id":"62402311","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8995253305","display_name":null,"funder_award_id":"24ZR1433700","funder_id":"https://openalex.org/F4320309612","funder_display_name":"Natural Science Foundation of Shanghai"}],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2124509324","https://openalex.org/W4393406920","https://openalex.org/W4404134117","https://openalex.org/W6856685690","https://openalex.org/W6862025885"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,16],"(LLMs)":[3],"are":[4],"increasingly":[5],"utilized":[6],"for":[7,108,196],"complex":[8],"tasks":[9],"requiring":[10],"longer":[11],"context":[12,76],"lengths,":[13],"with":[14,75],"some":[15],"supporting":[17],"up":[18],"to":[19,88,112],"128K":[20],"or":[21],"1M":[22],"tokens.":[23],"This":[24],"trend,":[25],"however,":[26],"presents":[27],"significant":[28,122],"challenges":[29],"in":[30,39,129],"inference":[31,42,194,207],"speed":[32],"and":[33,96,119,202,222],"memory":[34,70,97],"management.":[35],"The":[36],"primary":[37],"bottleneck":[38],"long-context":[40],"LLM":[41,80,94],"is":[43],"the":[44,90,164],"quadratic":[45],"computational":[46],"complexity":[47],"of":[48,127,159,166],"attention":[49],"mechanisms,":[50],"causing":[51,121],"substantial":[52],"slowdowns":[53],"as":[54,84],"sequence":[55],"length":[56],"increases.":[57],"KV":[58,109,130,148,160],"cache":[59,149,161],"mechanism":[60],"alleviates":[61],"this":[62,137],"issue":[63],"by":[64],"storing":[65],"pre-computed":[66],"data,":[67],"but":[68],"introduces":[69],"requirements":[71],"that":[72,198,213],"scale":[73],"linearly":[74],"length,":[77],"hindering":[78],"efficient":[79],"deployment.":[81],"Quantization":[82],"emerges":[83],"a":[85,142,156,173,191],"promising":[86],"approach":[87],"address":[89],"widening":[91],"gap":[92],"between":[93],"size":[95],"capacity.":[98],"However,":[99],"traditional":[100],"quantization":[101,118,144,168,175,219],"schemes":[102],"often":[103],"yield":[104],"suboptimal":[105],"compression":[106],"results":[107,211],"caches":[110],"due":[111],"two":[113],"key":[114],"factors:":[115],"i)":[116],"On-the-fly":[117],"de-quantization,":[120],"performance":[123],"overhead;":[124],"ii)":[125],"Prevalence":[126],"outliers":[128],"values,":[131],"challenging":[132],"low-bitwidth":[133,147],"uniform":[134],"quantization.":[135,152],"To":[136],"end,":[138],"we":[139,154,171,189],"propose":[140],"EVASION,":[141],"novel":[143],"framework":[145,195],"achieving":[146],"through":[150],"product":[151,179],"First,":[153],"conduct":[155],"thorough":[157],"analysis":[158],"distribution,":[162],"revealing":[163],"limitations":[165],"existing":[167],"schemes.":[169],"Second,":[170],"introduce":[172],"non-uniform":[174],"algorithm":[176],"based":[177],"on":[178],"quantization,":[180,204],"which":[181],"efficiently":[182],"compresses":[183],"data":[184],"while":[185],"preserving":[186],"accuracy.":[187],"Third,":[188],"develop":[190],"high-performance":[192],"GPU":[193],"EVASION":[197,214],"leverages":[199],"sparse":[200],"computation":[201],"asynchronous":[203],"significantly":[205],"enhancing":[206],"speed.":[208],"Comprehensive":[209],"evaluation":[210],"demonstrate":[212],"can":[215],"achieve":[216],"4":[217],"bits":[218],"trivial":[220],"perplexity":[221],"accuracy":[223],"loss.":[224]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-03T08:47:05.690250","created_date":"2025-10-10T00:00:00"}
