{"id":"https://openalex.org/W7133678156","doi":"https://doi.org/10.48550/arxiv.2603.03301","title":"From Exact Hits to Close Enough: Semantic Caching for LLM Embeddings","display_name":"From Exact Hits to Close Enough: Semantic Caching for LLM Embeddings","publication_year":2026,"publication_date":"2026-02-07","ids":{"openalex":"https://openalex.org/W7133678156","doi":"https://doi.org/10.48550/arxiv.2603.03301"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.03301","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5117130057","display_name":"Dvir David Biton","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Biton, Dvir David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128196077","display_name":"Roy Friedman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Friedman, Roy","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5117130057"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.2071000039577484,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.2071000039577484,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.16189999878406525,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.059700001031160355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6521999835968018},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.626800000667572},{"id":"https://openalex.org/keywords/semantic-computing","display_name":"Semantic computing","score":0.4968999922275543},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.45010000467300415},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.4323999881744385},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.41830000281333923},{"id":"https://openalex.org/keywords/semantic-grid","display_name":"Semantic grid","score":0.4113999903202057}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8442000150680542},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6521999835968018},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.626800000667572},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.4968999922275543},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.45010000467300415},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.4323999881744385},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.41830000281333923},{"id":"https://openalex.org/C103692084","wikidata":"https://www.wikidata.org/wiki/Q1765824","display_name":"Semantic grid","level":3,"score":0.4113999903202057},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.41119998693466187},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36320000886917114},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3452000021934509},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3165000081062317},{"id":"https://openalex.org/C6881194","wikidata":"https://www.wikidata.org/wiki/Q7449091","display_name":"Semantic technology","level":4,"score":0.30979999899864197},{"id":"https://openalex.org/C198942812","wikidata":"https://www.wikidata.org/wiki/Q496618","display_name":"Semantic property","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.28859999775886536},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28619998693466187},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2777000069618225},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.27619999647140503},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.27390000224113464},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.25189998745918274}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.03301","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.03301","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.03301","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.03301","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.6511269211769104}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"rapid":[1],"adoption":[2],"of":[3],"large":[4],"language":[5],"models":[6],"(LLMs)":[7],"has":[8],"created":[9],"demand":[10],"for":[11,45,100,107],"faster":[12],"responses":[13],"and":[14,34,57,74,103],"lower":[15],"costs.":[16],"Semantic":[17],"caching,":[18,47],"reusing":[19],"semantically":[20],"similar":[21],"requests":[22],"via":[23],"their":[24],"embeddings,":[25],"addresses":[26],"this":[27,39],"need":[28],"but":[29],"breaks":[30],"classic":[31],"cache":[32,68],"assumptions":[33],"raises":[35],"new":[36],"challenges.":[37],"In":[38],"paper,":[40],"we":[41],"explore":[42],"offline":[43,53],"policies":[44,69,85],"semantic":[46,66,93],"proving":[48],"that":[49,70,81],"implementing":[50],"an":[51],"optimal":[52],"policy":[54],"is":[55,112],"NP-hard,":[56],"propose":[58],"several":[59],"polynomial-time":[60],"heuristics.":[61],"We":[62],"also":[63],"present":[64],"online":[65],"aware":[67],"combine":[71],"recency,":[72],"frequency,":[73],"locality.":[75],"Evaluations":[76],"on":[77],"diverse":[78],"datasets":[79],"show":[80],"while":[82],"frequency":[83],"based":[84],"are":[86],"strong":[87],"baselines,":[88],"our":[89],"novel":[90],"variant":[91],"improves":[92],"accuracy.":[94],"Our":[95],"findings":[96],"reveal":[97],"effective":[98],"strategies":[99],"current":[101],"systems":[102],"highlight":[104],"substantial":[105],"headroom":[106],"future":[108],"innovation.":[109],"All":[110],"code":[111],"open":[113],"source.":[114]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-06T00:00:00"}
