{"id":"https://openalex.org/W4411799757","doi":"https://doi.org/10.1109/icmcis64378.2025.11048102","title":"Exploring Shared Large Language Models: Early Insights into Scalability and Efficiency in AI Assistant and Agent Deployment","display_name":"Exploring Shared Large Language Models: Early Insights into Scalability and Efficiency in AI Assistant and Agent Deployment","publication_year":2025,"publication_date":"2025-05-13","ids":{"openalex":"https://openalex.org/W4411799757","doi":"https://doi.org/10.1109/icmcis64378.2025.11048102"},"language":"en","primary_location":{"id":"doi:10.1109/icmcis64378.2025.11048102","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icmcis64378.2025.11048102","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Military Communication and Information Systems (ICMCIS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026364979","display_name":"Arvid Kok","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Arvid Kok","raw_affiliation_strings":["NATO Communications and Information Agency,Data Science &#x0026; AI, CTO,The Hague,The Netherlands"],"affiliations":[{"raw_affiliation_string":"NATO Communications and Information Agency,Data Science &#x0026; AI, CTO,The Hague,The Netherlands","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044375620","display_name":"Antonio Carvalho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Antonio Carvalho","raw_affiliation_strings":["NATO Communications and Information Agency,Data Science &#x0026; AI, CTO,The Hague,The Netherlands"],"affiliations":[{"raw_affiliation_string":"NATO Communications and Information Agency,Data Science &#x0026; AI, CTO,The Hague,The Netherlands","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051306835","display_name":"Michael Street","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michael Street","raw_affiliation_strings":["NATO Communications and Information Agency,Data Science &#x0026; AI, CTO,The Hague,The Netherlands"],"affiliations":[{"raw_affiliation_string":"NATO Communications and Information Agency,Data Science &#x0026; AI, CTO,The Hague,The Netherlands","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5026364979"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08326577,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9836000204086304,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.9728999733924866,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.7931212782859802},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7914628982543945},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7665433883666992},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3806445002555847},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.37483784556388855},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.31380343437194824},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.14516681432724}],"concepts":[{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.7931212782859802},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7914628982543945},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7665433883666992},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3806445002555847},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.37483784556388855},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.31380343437194824},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.14516681432724}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icmcis64378.2025.11048102","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icmcis64378.2025.11048102","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Military Communication and Information Systems (ICMCIS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2980282514","https://openalex.org/W4321593177","https://openalex.org/W4400410487","https://openalex.org/W4401202443","https://openalex.org/W4403577155","https://openalex.org/W4408029577"],"related_works":["https://openalex.org/W2770234245","https://openalex.org/W96612179","https://openalex.org/W4229499248","https://openalex.org/W1982914007","https://openalex.org/W2159583675","https://openalex.org/W1824242903","https://openalex.org/W1493858311","https://openalex.org/W2155470929","https://openalex.org/W2111125783","https://openalex.org/W2394465510"],"abstract_inverted_index":{"The":[0,182],"deployment":[1],"of":[2,31,161,201],"Large":[3],"Language":[4],"Models":[5],"(LLMs)":[6],"is":[7],"rapidly":[8],"expanding":[9],"across":[10,34],"diverse":[11],"applications,":[12,36],"necessitating":[13],"cost-effective":[14],"and":[15,28,46,70,77,102,120,139,147,152,167,214],"resource-efficient":[16],"strategies":[17],"to":[18,57,97,136],"optimize":[19],"their":[20],"usage.":[21],"This":[22,155,205],"paper":[23,206],"investigates":[24],"the":[25,63,158,196,211,221],"scalability,":[26,115],"efficiency,":[27,90],"performance":[29],"trade-offs":[30],"sharing":[32],"LLMs":[33,163],"multiple":[35],"addressing":[37],"critical":[38],"challenges":[39],"such":[40,179],"as":[41,180],"GPU":[42,78,134],"limitations,":[43],"concurrency":[44,71,92],"management,":[45],"latency":[47,124,149],"optimization.":[48],"Using":[49],"three":[50],"experimental":[51],"setups":[52],"ranging":[53],"from":[54],"consumer-grade":[55],"GPUs":[56],"high-performance":[58],"cloud":[59],"infrastructure,":[60],"we":[61],"examine":[62],"interplay":[64],"between":[65],"prompt":[66],"size,":[67,69],"model":[68],"on":[72,114],"metrics":[73],"like":[74],"latency,":[75],"throughput,":[76],"utilization.":[79],"Our":[80],"findings":[81],"reveal":[82],"that":[83],"shared":[84,162,189],"LLM":[85],"architectures":[86],"significantly":[87],"enhance":[88],"resource":[89],"with":[91,127,176],"improving":[93],"throughput":[94],"by":[95,220],"2x":[96],"4x":[98],"for":[99,105,117,144,170,198],"longer":[100],"prompts":[101],"over":[103],"20x":[104],"shorter":[106],"batched":[107],"prompts.":[108],"However,":[109],"memory":[110,138],"constraints":[111],"impose":[112],"limitations":[113],"particularly":[116,173],"large":[118],"models":[119],"extended":[121],"prompts,":[122],"where":[123],"increases":[125],"linearly":[126],"context":[128],"length.":[129],"Practical":[130],"recommendations":[131],"include":[132],"tailoring":[133],"configurations":[135],"balance":[137],"compute":[140],"demands,":[141],"leveraging":[142],"batching":[143],"optimal":[145],"utilization,":[146],"mitigating":[148],"through":[150],"caching":[151],"load":[153],"balancing.":[154],"study":[156],"underscores":[157],"strategic":[159],"value":[160],"in":[164,174,230],"reducing":[165],"costs":[166],"enhancing":[168],"scalability":[169],"multi-application":[171],"scenarios,":[172],"domains":[175],"constrained":[177],"resources,":[178],"defense.":[181],"results":[183],"provide":[184],"actionable":[185],"insights":[186],"into":[187],"deploying":[188],"generative":[190],"AI":[191],"systems":[192],"efficiently":[193],"while":[194],"paving":[195],"way":[197],"future":[199],"exploration":[200],"advanced":[202],"optimization":[203],"techniques.":[204],"was":[207],"originally":[208],"presented":[209],"at":[210],"NATO":[212],"Science":[213],"Technology":[215,224],"Organization":[216],"Symposium":[217],"(ICMCIS)":[218],"organized":[219],"Information":[222],"Systems":[223],"(IST)":[225],"Panel,":[226],"IST-209-RSY-the":[227],"ICMCIS,":[228],"held":[229],"Oeiras,":[231],"Portugal,":[232],"13\u201314":[233],"May":[234],"2025.":[235]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
