{"id":"https://openalex.org/W7153184966","doi":"https://doi.org/10.1109/access.2026.3680126","title":"QubitCache: Quantum-Inspired Probabilistic Attention Preservation for KV-Cache Compression","display_name":"QubitCache: Quantum-Inspired Probabilistic Attention Preservation for KV-Cache Compression","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7153184966","doi":"https://doi.org/10.1109/access.2026.3680126"},"language":"en","primary_location":{"id":"doi:10.1109/access.2026.3680126","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3680126","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2026.3680126","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058572236","display_name":"Jieui Kang","orcid":"https://orcid.org/0009-0000-7691-0930"},"institutions":[{"id":"https://openalex.org/I138925566","display_name":"Ewha Womans University","ror":"https://ror.org/053fp5c05","country_code":"KR","type":"education","lineage":["https://openalex.org/I138925566"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jieui Kang","raw_affiliation_strings":["Artificial Intelligence Convergence, Ewha Womans University, Seoul, South Korea"],"raw_orcid":"https://orcid.org/0009-0000-7691-0930","affiliations":[{"raw_affiliation_string":"Artificial Intelligence Convergence, Ewha Womans University, Seoul, South Korea","institution_ids":["https://openalex.org/I138925566"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121959511","display_name":"Jaeyoung Choi","orcid":null},"institutions":[{"id":"https://openalex.org/I138925566","display_name":"Ewha Womans University","ror":"https://ror.org/053fp5c05","country_code":"KR","type":"education","lineage":["https://openalex.org/I138925566"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaeyoung Choi","raw_affiliation_strings":["Department of Computer Science and Engineering, Ewha Womans University, Seoul, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ewha Womans University, Seoul, South Korea","institution_ids":["https://openalex.org/I138925566"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133386587","display_name":"Wonhui Noh","orcid":null},"institutions":[{"id":"https://openalex.org/I138925566","display_name":"Ewha Womans University","ror":"https://ror.org/053fp5c05","country_code":"KR","type":"education","lineage":["https://openalex.org/I138925566"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Wonhui Noh","raw_affiliation_strings":["Department of Computer Science and Engineering, Ewha Womans University, Seoul, South Korea"],"raw_orcid":"https://orcid.org/0009-0002-3936-2896","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ewha Womans University, Seoul, South Korea","institution_ids":["https://openalex.org/I138925566"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013362846","display_name":"Jaehyeong Sim","orcid":"https://orcid.org/0000-0001-8722-8486"},"institutions":[{"id":"https://openalex.org/I138925566","display_name":"Ewha Womans University","ror":"https://ror.org/053fp5c05","country_code":"KR","type":"education","lineage":["https://openalex.org/I138925566"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaehyeong Sim","raw_affiliation_strings":["Department of Computer Science and Engineering, Ewha Womans University, Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0001-8722-8486","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ewha Womans University, Seoul, South Korea","institution_ids":["https://openalex.org/I138925566"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5058572236"],"corresponding_institution_ids":["https://openalex.org/I138925566"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.74002915,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":null,"first_page":"56679","last_page":"56693"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10682","display_name":"Quantum Computing Algorithms and Architecture","score":0.5475999712944031,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10682","display_name":"Quantum Computing Algorithms and Architecture","score":0.5475999712944031,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.0568000003695488,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.03889999911189079,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6754999756813049},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.5990999937057495},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5679000020027161},{"id":"https://openalex.org/keywords/compression-artifact","display_name":"Compression artifact","score":0.2549000084400177},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.24560000002384186}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7739999890327454},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6754999756813049},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.5990999937057495},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5679000020027161},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4672999978065491},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3240000009536743},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2644999921321869},{"id":"https://openalex.org/C57654395","wikidata":"https://www.wikidata.org/wiki/Q1097775","display_name":"Compression artifact","level":5,"score":0.2549000084400177},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.24560000002384186},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.22840000689029694}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2026.3680126","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3680126","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:fb66f9ac3cbd42e68305e89de37de011","is_oa":true,"landing_page_url":"https://doaj.org/article/fb66f9ac3cbd42e68305e89de37de011","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 14, Pp 56679-56693 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2026.3680126","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3680126","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6596503257751465,"id":"https://metadata.un.org/sdg/13","display_name":"Climate action"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"model":[2],"inference":[3],"faces":[4],"a":[5,44,62,78,98,124],"critical":[6,101],"memory":[7,21,29,262],"bottleneck":[8],"from":[9,74],"KV":[10,48],"cache,":[11],"which":[12,152,277],"grows":[13],"linearly":[14],"with":[15,270],"sequence":[16],"length":[17],"and":[18,178,191,242,253,303,311],"dominates":[19],"GPU":[20],"during":[22,182],"long-context":[23,236],"generation.":[24],"Existing":[25],"compression":[26,50,54],"methods":[27,255],"reduce":[28],"through":[30,225],"token":[31,57],"eviction":[32],"but":[33],"irreversibly":[34],"discard":[35],"attention":[36,52,89,112,146,159,217],"relationships":[37],"essential":[38],"for":[39,204],"complex":[40],"reasoning.We":[41],"present":[42],"QubitCache,":[43],"framework":[45],"that":[46,135,170,195,287],"reframes":[47],"cache":[49],"as":[51,77],"distribution":[53,81,173],"rather":[55,227],"than":[56,228],"selection.":[58],"Our":[59],"approach":[60,309],"employs":[61],"<italic":[63],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[64,199],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">quantum-inspired</i>":[65],"mathematical":[66],"formalism":[67],"(using":[68],"the":[69,136,143,171,186,196,205,243],"hierarchical":[70,120],"amplitude":[71,121],"encoding":[72],"structure":[73],"quantum":[75,85],"computing":[76],"classical":[79,109,249],"probability":[80],"compressor,":[82],"without":[83],"requiring":[84],"hardware)":[86],"to":[87,149,157,214],"represent":[88],"weight":[90],"distributions":[91,113,211],"in":[92,107],"logarithmic":[93],"parameter":[94,126],"space.":[95],"QubitCache":[96,233,258],"introduces":[97],"hybrid":[99],"architecture:":[100],"tokens":[102,116,224],"(15%":[103],"of":[104,145,166,188,222,267,273,294],"sequence)":[105],"remain":[106],"full-precision":[108],"storage,":[110],"while":[111,264],"over":[114],"remaining":[115],"are":[117,212],"compressed":[118,150,172,210,223],"using":[119],"encoding,":[122],"achieving":[123],"57\u00d7":[125],"reduction":[127,263],"per":[128],"segment.":[129],"We":[130,231],"provide":[131,192,215],"formal":[132],"analysis":[133,285,298],"showing":[134],"pipeline":[137],"output":[138],"error":[139],"is":[140,153,169,174,179,202,278],"bounded":[141],"by":[142],"fraction":[144],"mass":[147],"assigned":[148],"tokens,":[151],"small":[154],"(\u226415%)":[155],"due":[156],"inherent":[158],"sparsity":[160],"(Theorem":[161],"1).":[162],"An":[163],"important":[164],"property":[165],"our":[167],"design":[168],"fixed":[175],"after":[176],"prefill":[177],"therefore":[180],"query-agnostic":[181],"decoding;":[183],"we":[184],"analyze":[185],"implications":[187],"this":[189,308],"trade-off":[190],"empirical":[193],"evidence":[194],"resulting":[197],"\u03bb<sub":[198],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">mass</sub>":[200],"stability":[201],"sufficient":[203],"tasks":[206,241],"evaluated.":[207],"During":[208],"inference,":[209],"reconstructed":[213],"soft":[216],"weights,":[218],"maintaining":[219],"contextual":[220],"influence":[221],"probabilistic":[226],"binary":[229],"decisions.":[230],"evaluate":[232],"on":[234,275],"four":[235],"models":[237],"across":[238],"sixteen":[239],"LongBench":[240],"RULER":[244],"benchmark,":[245],"comparing":[246],"against":[247],"both":[248],"baselines":[250],"(H2O,":[251],"SnapKV)":[252],"recent":[254],"(MiniKV,":[256],"Compactor).":[257],"achieves":[259],"approximately":[260],"6.7\u00d7":[261],"retaining":[265],"91\u201399%":[266],"full-KV":[268],"performance,":[269],"an":[271],"average":[272],"74.8":[274],"RULER,":[276],"10.5":[279],"points":[280],"above":[281],"token-eviction":[282],"baselines.":[283],"Latency":[284],"confirms":[286],"per-token":[288],"decode":[289],"overhead":[290],"remains":[291],"within":[292],"2\u00d7":[293],"eviction-based":[295],"methods.":[296],"Comprehensive":[297],"including":[299],"sensitivity,":[300],"quantization":[301],"integration,":[302],"failure":[304],"cases":[305],"characterizes":[306],"when":[307],"succeeds":[310],"where":[312],"it":[313],"has":[314],"limitations.":[315]},"counts_by_year":[],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2026-04-11T00:00:00"}
