{"id":"https://openalex.org/W4402897391","doi":"https://doi.org/10.1109/iwqos61813.2024.10682957","title":"SCALM: Towards Semantic Caching for Automated Chat Services with Large Language Models","display_name":"SCALM: Towards Semantic Caching for Automated Chat Services with Large Language Models","publication_year":2024,"publication_date":"2024-06-19","ids":{"openalex":"https://openalex.org/W4402897391","doi":"https://doi.org/10.1109/iwqos61813.2024.10682957"},"language":"en","primary_location":{"id":"doi:10.1109/iwqos61813.2024.10682957","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwqos61813.2024.10682957","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/ACM 32nd International Symposium on Quality of Service (IWQoS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100636528","display_name":"Jiaxing Li","orcid":"https://orcid.org/0000-0001-7048-9284"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Jiaxing Li","raw_affiliation_strings":["Simon Fraser University,School of Computing Science,Canada"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University,School of Computing Science,Canada","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100771854","display_name":"Chi Xu","orcid":"https://orcid.org/0000-0001-7389-5763"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Chi Xu","raw_affiliation_strings":["Simon Fraser University,School of Computing Science,Canada"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University,School of Computing Science,Canada","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069404241","display_name":"Feng Wang","orcid":"https://orcid.org/0000-0002-6584-0516"},"institutions":[{"id":"https://openalex.org/I368840534","display_name":"University of Mississippi","ror":"https://ror.org/02teq1165","country_code":"US","type":"education","lineage":["https://openalex.org/I368840534","https://openalex.org/I4210141039"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Feng Wang","raw_affiliation_strings":["University of Mississippi,Department of Computer and Information Science,Mississippi,USA"],"affiliations":[{"raw_affiliation_string":"University of Mississippi,Department of Computer and Information Science,Mississippi,USA","institution_ids":["https://openalex.org/I368840534"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099021824","display_name":"Isaac M von Riedemann","orcid":null},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Isaac M von Riedemann","raw_affiliation_strings":["Simon Fraser University,School of Computing Science,Canada"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University,School of Computing Science,Canada","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054518730","display_name":"Cong Zhang","orcid":"https://orcid.org/0000-0002-8952-0429"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cong Zhang","raw_affiliation_strings":["Jiangxing Intelligence Inc.,China"],"affiliations":[{"raw_affiliation_string":"Jiangxing Intelligence Inc.,China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109790236","display_name":"Jiangchuan Liu","orcid":"https://orcid.org/0009-0000-5464-9970"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jiangchuan Liu","raw_affiliation_strings":["Simon Fraser University,School of Computing Science,Canada"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University,School of Computing Science,Canada","institution_ids":["https://openalex.org/I18014758"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100636528"],"corresponding_institution_ids":["https://openalex.org/I18014758"],"apc_list":null,"apc_paid":null,"fwci":3.1409,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.9236417,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8619059324264526},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.44579601287841797}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8619059324264526},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.44579601287841797}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iwqos61813.2024.10682957","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwqos61813.2024.10682957","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/ACM 32nd International Symposium on Quality of Service (IWQoS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1987225815","https://openalex.org/W1990643970","https://openalex.org/W2072853307","https://openalex.org/W2131806537","https://openalex.org/W2158703410","https://openalex.org/W2604123379","https://openalex.org/W2740924709","https://openalex.org/W2804070368","https://openalex.org/W2889402930","https://openalex.org/W3215495615","https://openalex.org/W4312933868","https://openalex.org/W4366341216","https://openalex.org/W4387869908","https://openalex.org/W4389523830","https://openalex.org/W4389523909","https://openalex.org/W4391590983","https://openalex.org/W6632074802","https://openalex.org/W6639025630","https://openalex.org/W6681515304","https://openalex.org/W6774028308","https://openalex.org/W6779109089","https://openalex.org/W6839092155","https://openalex.org/W6845281891","https://openalex.org/W6847478871","https://openalex.org/W6851687240","https://openalex.org/W6852670370","https://openalex.org/W6852772504","https://openalex.org/W6852962002","https://openalex.org/W6853192989","https://openalex.org/W6856622439"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"become":[5],"increasingly":[6],"popular,":[7],"transforming":[8],"a":[9,86,141,151],"wide":[10],"range":[11],"of":[12,21,106,144,154],"applications":[13],"across":[14],"various":[15],"domains.":[16],"However,":[17],"the":[18,36,104,107],"real-world":[19,43],"effectiveness":[20],"their":[22],"query":[23],"cache":[24,73,88,97,109,120,147],"systems":[25],"has":[26],"not":[27],"been":[28],"thoroughly":[29],"investigated.":[30],"In":[31],"this":[32],"work,":[33],"we":[34,83],"for":[35,54,127],"first":[37],"time":[38],"conducted":[39],"an":[40],"analysis":[41,93],"on":[42,139],"human-to-LLM":[44],"interaction":[45],"data,":[46],"identifying":[47],"key":[48],"challenges":[49],"in":[50,135,146,156],"existing":[51],"caching":[52,63],"solutions":[53,134],"LLM-based":[55],"chat":[56],"services.":[57,129],"Our":[58,114],"findings":[59],"reveal":[60],"that":[61,90,117],"current":[62],"methods":[64],"fail":[65],"to":[66,71],"leverage":[67],"semantic":[68,92],"connections,":[69],"leading":[70],"inefficient":[72],"performance":[74],"and":[75,94,99,111,123,150],"extra":[76],"token":[77],"costs.":[78],"To":[79],"address":[80],"these":[81],"issues,":[82],"propose":[84],"SCALM,":[85],"new":[87],"architecture":[89],"emphasizes":[91],"identifies":[95],"significant":[96],"entries":[98],"patterns.":[100],"We":[101],"also":[102],"detail":[103],"implementations":[105],"corresponding":[108],"storage":[110],"eviction":[112],"strategies.":[113],"evaluations":[115],"show":[116],"SCALM":[118,137],"increases":[119],"hit":[121,148],"ratios":[122],"reduces":[124],"operational":[125],"costs":[126],"LLMChat":[128],"Compared":[130],"with":[131],"other":[132],"state-of-the-art":[133],"GPTCache,":[136],"shows,":[138],"average,":[140],"relative":[142,152],"increase":[143],"63%":[145],"ratio":[149],"improvement":[153],"77%":[155],"tokens":[157],"savings.":[158]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-29T08:15:47.926485","created_date":"2025-10-10T00:00:00"}
