{"id":"https://openalex.org/W4414538550","doi":"https://doi.org/10.1109/icc52391.2025.11161094","title":"Joint Caching and Inference for Large Language Models in Wireless Networks","display_name":"Joint Caching and Inference for Large Language Models in Wireless Networks","publication_year":2025,"publication_date":"2025-06-08","ids":{"openalex":"https://openalex.org/W4414538550","doi":"https://doi.org/10.1109/icc52391.2025.11161094"},"language":"en","primary_location":{"id":"doi:10.1109/icc52391.2025.11161094","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icc52391.2025.11161094","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICC 2025 - IEEE International Conference on Communications","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114804304","display_name":"Bingjie Zhu","orcid":"https://orcid.org/0009-0003-6232-6057"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bingjie Zhu","raw_affiliation_strings":["Xidian University,The State Key Laboratory of Integrated Services Networks,Xi&#x0027;an,China,710071"],"affiliations":[{"raw_affiliation_string":"Xidian University,The State Key Laboratory of Integrated Services Networks,Xi&#x0027;an,China,710071","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090876459","display_name":"Zhixiong Chen","orcid":"https://orcid.org/0000-0003-4228-0023"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhixiong Chen","raw_affiliation_strings":["Queen Mary University of London,London,U.K"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,London,U.K","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014340318","display_name":"Liqiang Zhao","orcid":"https://orcid.org/0000-0002-3374-6066"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liqiang Zhao","raw_affiliation_strings":["Xidian University,The State Key Laboratory of Integrated Services Networks,Xi&#x0027;an,China,710071"],"affiliations":[{"raw_affiliation_string":"Xidian University,The State Key Laboratory of Integrated Services Networks,Xi&#x0027;an,China,710071","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007557286","display_name":"Hyundong Shin","orcid":"https://orcid.org/0000-0003-3364-8084"},"institutions":[{"id":"https://openalex.org/I35928602","display_name":"Kyung Hee University","ror":"https://ror.org/01zqcg218","country_code":"KR","type":"education","lineage":["https://openalex.org/I35928602"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyundong Shin","raw_affiliation_strings":["Kyung Hee University, Yongin-si,Gyeonggido,Republic of Korea,17104"],"affiliations":[{"raw_affiliation_string":"Kyung Hee University, Yongin-si,Gyeonggido,Republic of Korea,17104","institution_ids":["https://openalex.org/I35928602"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002265731","display_name":"Arumugam Nallanathan","orcid":"https://orcid.org/0000-0001-8337-5884"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Arumugam Nallanathan","raw_affiliation_strings":["Queen Mary University of London,London,U.K"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,London,U.K","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5114804304"],"corresponding_institution_ids":["https://openalex.org/I149594827"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.32343712,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6285","last_page":"6290"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10651","display_name":"IPv6, Mobility, Handover, Networks, Security","score":0.9754999876022339,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9539999961853027,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.684499979019165},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5878999829292297},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.48559999465942383},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.45350000262260437},{"id":"https://openalex.org/keywords/wireless-network","display_name":"Wireless network","score":0.4415999948978424},{"id":"https://openalex.org/keywords/linear-network-coding","display_name":"Linear network coding","score":0.42750000953674316},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.41780000925064087},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.40950000286102295},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.4043000042438507}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8511000275611877},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.684499979019165},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5878999829292297},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.48559999465942383},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.45350000262260437},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.45190000534057617},{"id":"https://openalex.org/C108037233","wikidata":"https://www.wikidata.org/wiki/Q11375","display_name":"Wireless network","level":3,"score":0.4415999948978424},{"id":"https://openalex.org/C138293262","wikidata":"https://www.wikidata.org/wiki/Q1089578","display_name":"Linear network coding","level":3,"score":0.42750000953674316},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41780000925064087},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.40950000286102295},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.4043000042438507},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.391400009393692},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.3756999969482422},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.3691999912261963},{"id":"https://openalex.org/C68649174","wikidata":"https://www.wikidata.org/wiki/Q1379116","display_name":"Base station","level":2,"score":0.36329999566078186},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.3625999987125397},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.35659998655319214},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.3490000069141388},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.3481999933719635},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.3222000002861023},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3181000053882599},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3118000030517578},{"id":"https://openalex.org/C2781041963","wikidata":"https://www.wikidata.org/wiki/Q18348618","display_name":"Computation offloading","level":4,"score":0.3082999885082245},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.303600013256073},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3034000098705292},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29030001163482666},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.27320000529289246},{"id":"https://openalex.org/C2780586970","wikidata":"https://www.wikidata.org/wiki/Q1357284","display_name":"Popularity","level":2,"score":0.257999986410141}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icc52391.2025.11161094","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icc52391.2025.11161094","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICC 2025 - IEEE International Conference on Communications","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W3009562139","https://openalex.org/W4250589301","https://openalex.org/W4292348059","https://openalex.org/W4389252700","https://openalex.org/W4391250505","https://openalex.org/W4400447774","https://openalex.org/W4406207083"],"related_works":[],"abstract_inverted_index":{"To":[0],"reduce":[1],"the":[2,11,24,53,89,93,118,140,156,162,170],"serving":[3,14,73],"delay":[4,178],"of":[5],"large":[6,163],"language":[7],"model":[8],"(LLM)-based":[9],"applications,":[10],"edge-based":[12,40],"LLM":[13,28,41,59,72,77,83,120,125],"mechanism":[15,138],"offers":[16],"a":[17,57,131],"promising":[18],"solution":[19,91],"by":[20,35,100],"caching":[21,42,84,121],"LLMs":[22,153],"at":[23],"edge":[25],"to":[26,32,46,70,147,161],"provide":[27],"inference":[29,44,61],"services":[30],"closer":[31],"users.":[33],"Motivated":[34],"this,":[36],"we":[37,55,86,106],"propose":[38,107],"an":[39,82,108],"and":[43,64,97,134,154,179],"framework":[45],"support":[47],"low-delay":[48],"LLM-based":[49],"services.":[50],"Based":[51],"on":[52],"framework,":[54],"formulate":[56],"joint":[58],"caching,":[60],"task":[62,98],"scheduling,":[63],"computation":[65,94],"resource":[66,95],"allocation":[67,96],"optimization":[68,103],"problem":[69],"minimize":[71],"delay,":[74],"where":[75],"time-varying":[76],"popularity":[78],"is":[79],"considered.":[80],"Given":[81],"policy,":[85],"first":[87],"obtain":[88],"optimal":[90,119],"for":[92,152],"scheduling":[99],"using":[101],"traditional":[102],"methods.":[104],"Then,":[105],"improved":[109],"double":[110],"deep":[111,141],"Q-network":[112],"(IDDQN)":[113],"algorithm":[114,129],"that":[115,169],"effectively":[116],"learns":[117],"strategy":[122],"under":[123],"unknown":[124],"popularity.":[126],"The":[127],"IDDQN":[128],"integrates":[130],"state":[132],"coding":[133],"action":[135,164],"aggregation":[136],"(SCAA)":[137],"in":[139],"neural":[142],"network":[143],"structure,":[144],"enabling":[145],"it":[146],"efficiently":[148],"capture":[149],"users'":[150],"preferences":[151],"mitigate":[155],"slow":[157],"convergence":[158,181],"issues":[159],"due":[160],"space.":[165],"Simulation":[166],"results":[167],"indicate":[168],"proposed":[171],"scheme":[172],"achieves":[173],"both":[174],"lower":[175],"average":[176],"user":[177],"faster":[180],"than":[182],"other":[183],"benchmarks.":[184]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
