{"id":"https://openalex.org/W4405440694","doi":"https://doi.org/10.1109/icimtech63123.2024.10780864","title":"Knowledge Graph-Enhanced Semantic Cache for Low-Latency and Cost-Effective Inference in Large Language Models","display_name":"Knowledge Graph-Enhanced Semantic Cache for Low-Latency and Cost-Effective Inference in Large Language Models","publication_year":2024,"publication_date":"2024-08-28","ids":{"openalex":"https://openalex.org/W4405440694","doi":"https://doi.org/10.1109/icimtech63123.2024.10780864"},"language":"en","primary_location":{"id":"doi:10.1109/icimtech63123.2024.10780864","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icimtech63123.2024.10780864","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Information Management and Technology (ICIMTech)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016974426","display_name":"Nicholas Dominic","orcid":"https://orcid.org/0000-0003-2015-6689"},"institutions":[{"id":"https://openalex.org/I166073570","display_name":"Binus University","ror":"https://ror.org/03zmf4s77","country_code":"ID","type":"education","lineage":["https://openalex.org/I166073570"]}],"countries":["ID"],"is_corresponding":true,"raw_author_name":"Nicholas Dominic","raw_affiliation_strings":["Bina Nusantara University,Bioinformatics &#x0026; Data Science Research Center,Jakarta,Indonesia,11480"],"affiliations":[{"raw_affiliation_string":"Bina Nusantara University,Bioinformatics &#x0026; Data Science Research Center,Jakarta,Indonesia,11480","institution_ids":["https://openalex.org/I166073570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052689356","display_name":"Bens Pardamean","orcid":"https://orcid.org/0000-0002-7404-9005"},"institutions":[{"id":"https://openalex.org/I166073570","display_name":"Binus University","ror":"https://ror.org/03zmf4s77","country_code":"ID","type":"education","lineage":["https://openalex.org/I166073570"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Bens Pardamean","raw_affiliation_strings":["BINUS Graduate Program - Master of Computer Science, Bina Nusantara University,Computer Science Department,Jakarta,Indonesia,11480"],"affiliations":[{"raw_affiliation_string":"BINUS Graduate Program - Master of Computer Science, Bina Nusantara University,Computer Science Department,Jakarta,Indonesia,11480","institution_ids":["https://openalex.org/I166073570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5016974426"],"corresponding_institution_ids":["https://openalex.org/I166073570"],"apc_list":null,"apc_paid":null,"fwci":1.4504,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.85733835,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"340","last_page":"344"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9643999934196472,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9643999934196472,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.911899983882904,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8332130312919617},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6209657788276672},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4989187717437744},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4642162322998047},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.46059736609458923},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4220244288444519},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3584100604057312},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.269824743270874},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.22403821349143982}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8332130312919617},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6209657788276672},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4989187717437744},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4642162322998047},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.46059736609458923},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4220244288444519},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3584100604057312},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.269824743270874},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.22403821349143982},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icimtech63123.2024.10780864","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icimtech63123.2024.10780864","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Information Management and Technology (ICIMTech)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W3035403290","https://openalex.org/W3092074660","https://openalex.org/W3133224387","https://openalex.org/W3207351174","https://openalex.org/W4312515330","https://openalex.org/W4313121056","https://openalex.org/W4313138132","https://openalex.org/W4319997238","https://openalex.org/W4324095392","https://openalex.org/W4352978531","https://openalex.org/W4378387269","https://openalex.org/W4383377279","https://openalex.org/W4388996646","https://openalex.org/W4388996684","https://openalex.org/W4389209096","https://openalex.org/W4389523830","https://openalex.org/W4389523909","https://openalex.org/W4389615177","https://openalex.org/W6850927664","https://openalex.org/W6853495990"],"related_works":["https://openalex.org/W2152099439","https://openalex.org/W2055243143","https://openalex.org/W1984163603","https://openalex.org/W3130422087","https://openalex.org/W3004195166","https://openalex.org/W2126310295","https://openalex.org/W1563139915","https://openalex.org/W4288413100","https://openalex.org/W3128807919","https://openalex.org/W3176411177"],"abstract_inverted_index":{"In":[0,125],"organizational":[1],"knowledge":[2,91,156],"management,":[3],"Large":[4],"Language":[5],"Model":[6],"(LLM)":[7],"caches":[8],"act":[9],"as":[10,59,86],"a":[11,53,87,90,122],"semantic":[12,136],"repository":[13],"gathered":[14],"from":[15,23,29,96,107],"previous":[16],"LLM":[17,26,155,193],"responses.":[18],"Due":[19],"to":[20,40,64,80,118],"intensive":[21],"calls":[22],"multiple":[24],"users,":[25],"may":[27],"suffer":[28],"high":[30],"inference":[31],"latency.":[32],"While":[33],"there":[34],"are":[35,47],"many":[36],"prior":[37],"available":[38],"approaches":[39],"solve":[41],"this":[42,145,162],"problem,":[43],"most":[44],"of":[45,102,144,150],"them":[46],"inherently":[48],"complex.":[49],"This":[50,111],"paper":[51],"introduced":[52],"Knowledge":[54],"Graph-enhanced":[55],"Semantic":[56],"Cache":[57],"mechanism":[58],"an":[60,148],"alternative,":[61],"lightweight":[62],"technique":[63],"boost":[65],"retrieval":[66],"for":[67,154],"similar":[68],"prompts.":[69],"The":[70,164],"latest":[71],"state-of-the-art":[72],"open-source":[73],"LLM,":[74],"named":[75],"Google's":[76],"Gemma-2B-it,":[77],"was":[78,94,109,158],"used":[79],"generate":[81],"sample":[82],"prompts":[83,103],"and":[84,104,177,191],"responses":[85],"draft,":[88],"while":[89],"graph":[92],"(KG)":[93],"built":[95],"Wikipedia":[97],"sentences.":[98],"To":[99,139],"create":[100],"embeddings":[101,131],"KG,":[105],"all-MiniLM-L6-v2":[106],"SentenceTransformer":[108],"used.":[110],"new":[112,168],"cache":[113,130,137,171],"system":[114,172],"resulted":[115],"in":[116,161,188],"up":[117],"28%":[119],"improvement":[120],"over":[121],"standard":[123],"model.":[124],"particular,":[126],"reinforcement":[127],"with":[128,174],"KG":[129],"yielded":[132],"more":[133],"than":[134],"85%":[135],"accuracy.":[138],"map":[140],"the":[141,151,167],"next":[142],"trajectory":[143],"pilot":[146],"study,":[147],"overview":[149],"extended":[152],"framework":[153,165],"management":[157],"also":[159],"presented":[160],"paper.":[163],"includes":[166],"KG-":[169],"enhanced":[170],"equipped":[173],"scalable":[175],"security":[176],"fallback":[178],"mechanisms":[179],"that":[180],"can":[181],"promote":[182],"green":[183],"technology":[184],"through":[185],"substantial":[186],"improvements":[187],"latency,":[189],"throughput,":[190],"overall":[192],"costs.":[194]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
