{"id":"https://openalex.org/W7130535954","doi":"https://doi.org/10.1109/fllm67465.2025.11391052","title":"Performance and Cost Optimization of Federated LLM Agents in Edge Computing Environments","display_name":"Performance and Cost Optimization of Federated LLM Agents in Edge Computing Environments","publication_year":2025,"publication_date":"2025-11-25","ids":{"openalex":"https://openalex.org/W7130535954","doi":"https://doi.org/10.1109/fllm67465.2025.11391052"},"language":null,"primary_location":{"id":"doi:10.1109/fllm67465.2025.11391052","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fllm67465.2025.11391052","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 3rd International Conference on Foundation and Large Language Models (FLLM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126373500","display_name":"Goutam Tadi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136460","display_name":"Astronomical Institute of the Slovak Academy of Sciences","ror":"https://ror.org/03t6cz096","country_code":"SK","type":"facility","lineage":["https://openalex.org/I207624831","https://openalex.org/I4210136460"]}],"countries":["SK"],"is_corresponding":false,"raw_author_name":"Goutam Tadi","raw_affiliation_strings":["Astronomer,Platform Engineering,Leander,TX,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Astronomer,Platform Engineering,Leander,TX,USA","institution_ids":["https://openalex.org/I4210136460"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102640240","display_name":"Akshay Mittal","orcid":"https://orcid.org/0009-0008-5233-9248"},"institutions":[{"id":"https://openalex.org/I276309446","display_name":"University of the Cumberlands","ror":"https://ror.org/05jz3sn81","country_code":"US","type":"education","lineage":["https://openalex.org/I276309446"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Akshay Mittal","raw_affiliation_strings":["University of the Cumberlands,Austin,TX,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of the Cumberlands,Austin,TX,USA","institution_ids":["https://openalex.org/I276309446"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.68537096,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1267","last_page":"1274"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.31869998574256897,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.31869998574256897,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.13189999759197235,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.04089999943971634,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.666100025177002},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.6021999716758728},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.5866000056266785},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5687999725341797},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.5041999816894531},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4975999891757965},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.3605000078678131},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.33959999680519104}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.791100025177002},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.666100025177002},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6524999737739563},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.6021999716758728},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.5866000056266785},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5687999725341797},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.5041999816894531},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4975999891757965},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.3605000078678131},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.33959999680519104},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.32089999318122864},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.31679999828338623},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3109000027179718},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.30660000443458557},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.30399999022483826},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.2815000116825104},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C25344961","wikidata":"https://www.wikidata.org/wiki/Q192726","display_name":"Virtual machine","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fllm67465.2025.11391052","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fllm67465.2025.11391052","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 3rd International Conference on Foundation and Large Language Models (FLLM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2416799949","https://openalex.org/W2535690855","https://openalex.org/W2783538964","https://openalex.org/W2785696180","https://openalex.org/W2950865323","https://openalex.org/W2977090839","https://openalex.org/W3002291797","https://openalex.org/W3021654819","https://openalex.org/W3035905455","https://openalex.org/W3133913697","https://openalex.org/W4200495212","https://openalex.org/W4285815179","https://openalex.org/W4380607150","https://openalex.org/W4387321091","https://openalex.org/W4402568979","https://openalex.org/W4403674467","https://openalex.org/W4406650295","https://openalex.org/W4413679871"],"related_works":[],"abstract_inverted_index":{"The":[0,194],"deployment":[1,203],"of":[2,34,102,111,138],"Large":[3],"Language":[4],"Model":[5],"(LLM)":[6],"agents":[7],"is":[8,29],"shifting":[9],"from":[10],"centralized":[11],"cloud":[12],"infrastructures":[13],"to":[14,19,65,98],"decentralized":[15],"edge":[16,35,66,103,127,192],"computing":[17],"environments":[18],"achieve":[20],"reduced":[21],"latency":[22],"and":[23,51,73,148,165,175,182,219],"enhanced":[24],"data":[25,85],"privacy.":[26],"This":[27,87],"transition":[28],"driven":[30],"by":[31,92,235],"the":[32,79,99,136],"emergence":[33],"AI":[36,53],"applications":[37],"such":[38,158],"as":[39,159,233],"autonomous":[40],"vehicle":[41],"command":[42],"recognition,":[43],"industrial":[44],"process":[45],"monitoring":[46],"with":[47],"natural":[48],"language":[49],"interfaces,":[50],"conversational":[52],"on":[54,117,154],"resource-constrained":[55],"IoT":[56],"devices.":[57],"However,":[58],"this":[59],"shift":[60],"introduces":[61],"significant":[62],"challenges":[63,91],"due":[64],"devices\u2019":[67],"limited":[68],"computational":[69],"resources,":[70],"storage":[71,174],"capacity,":[72],"network":[74,209],"bandwidth,":[75],"which":[76],"severely":[77],"magnify":[78],"performance":[80,156],"bottlenecks":[81],"observed":[82],"in":[83,208,215,223],"well-resourced":[84],"centers.":[86],"paper":[88],"addresses":[89],"these":[90],"extending":[93],"cloud-based":[94],"LLM":[95,113],"optimization":[96,140,196],"techniques":[97],"unique":[100],"constraints":[101],"computing.":[104],"We":[105],"present":[106],"a":[107,118,124,130,205,212,220],"comprehensive":[108],"experimental":[109],"analysis":[110],"federated":[112],"agent":[114],"workloads":[115],"deployed":[116],"Kubernetes":[119],"(K3s)":[120],"cluster":[121],"that":[122,171],"simulates":[123],"realistic":[125],"heterogeneous":[126],"environment.":[128],"Through":[129],"systematic":[131],"ablation":[132],"study,":[133],"we":[134],"evaluate":[135],"effectiveness":[137],"multiple":[139],"strategies,":[141],"including":[142],"advanced":[143],"model":[144,162,184,231],"quantization,":[145],"distributed":[146],"caching,":[147],"network-aware":[149,180],"orchestration,":[150],"measuring":[151],"their":[152],"impact":[153],"critical":[155],"metrics":[157],"time-to-first-token":[160],"(TTFT),":[161],"synchronization":[163],"overhead,":[164],"energy":[166,224],"consumption.":[167],"Our":[168],"results":[169],"demonstrate":[170],"while":[172,228],"traditional":[173],"memory":[176],"optimizations":[177],"remain":[178],"important,":[179],"scheduling":[181],"adaptive":[183],"loading":[185],"strategies":[186],"are":[187],"crucial":[188],"for":[189],"achieving":[190],"practical":[191],"deployment.":[193],"proposed":[195],"framework":[197],"delivers":[198],"substantial":[199],"improvements":[200],"over":[201],"naive":[202],"approaches:":[204],"75%":[206],"reduction":[207],"bandwidth":[210],"consumption,":[211],"45%":[213],"improvement":[214],"end-to-end":[216],"inference":[217],"latency,":[218],"61%":[221],"decrease":[222],"usage":[225],"per":[226],"inference,":[227],"maintaining":[229],"acceptable":[230],"quality":[232],"measured":[234],"perplexity.":[236]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-20T00:00:00"}
