{"id":"https://openalex.org/W7138927244","doi":"https://doi.org/10.1109/globecom59602.2025.11432531","title":"Efficient Batch Processing for Private Cloud LLM Inference: Modeling and Performance Comparison","display_name":"Efficient Batch Processing for Private Cloud LLM Inference: Modeling and Performance Comparison","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W7138927244","doi":"https://doi.org/10.1109/globecom59602.2025.11432531"},"language":null,"primary_location":{"id":"doi:10.1109/globecom59602.2025.11432531","is_oa":false,"landing_page_url":"https://doi.org/10.1109/globecom59602.2025.11432531","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"GLOBECOM 2025 - 2025 IEEE Global Communications Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109515164","display_name":"Hiroki Nakai","orcid":null},"institutions":[{"id":"https://openalex.org/I14314212","display_name":"Osaka University of Economics","ror":"https://ror.org/04g11bp59","country_code":"JP","type":"education","lineage":["https://openalex.org/I14314212"]},{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Hiroki Nakai","raw_affiliation_strings":["The University of Osaka,Graduate School of Engineering,Osaka,Japan"],"affiliations":[{"raw_affiliation_string":"The University of Osaka,Graduate School of Engineering,Osaka,Japan","institution_ids":["https://openalex.org/I14314212","https://openalex.org/I98285908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061344969","display_name":"Yoshiaki Inoue","orcid":"https://orcid.org/0000-0002-2483-7652"},"institutions":[{"id":"https://openalex.org/I14314212","display_name":"Osaka University of Economics","ror":"https://ror.org/04g11bp59","country_code":"JP","type":"education","lineage":["https://openalex.org/I14314212"]},{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshiaki Inoue","raw_affiliation_strings":["The University of Osaka,Graduate School of Engineering,Osaka,Japan"],"affiliations":[{"raw_affiliation_string":"The University of Osaka,Graduate School of Engineering,Osaka,Japan","institution_ids":["https://openalex.org/I14314212","https://openalex.org/I98285908"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081487170","display_name":"Tetsuya Takine","orcid":"https://orcid.org/0000-0001-7499-6929"},"institutions":[{"id":"https://openalex.org/I14314212","display_name":"Osaka University of Economics","ror":"https://ror.org/04g11bp59","country_code":"JP","type":"education","lineage":["https://openalex.org/I14314212"]},{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tetsuya Takine","raw_affiliation_strings":["The University of Osaka,Graduate School of Engineering,Osaka,Japan"],"affiliations":[{"raw_affiliation_string":"The University of Osaka,Graduate School of Engineering,Osaka,Japan","institution_ids":["https://openalex.org/I14314212","https://openalex.org/I98285908"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5109515164"],"corresponding_institution_ids":["https://openalex.org/I14314212","https://openalex.org/I98285908"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.8849911,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5689","last_page":"5694"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.3476000130176544,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.3476000130176544,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.05090000107884407,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.03999999910593033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7806000113487244},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.5613999962806702},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4596000015735626},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.43959999084472656},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.4327999949455261},{"id":"https://openalex.org/keywords/capital-expenditure","display_name":"Capital expenditure","score":0.37389999628067017},{"id":"https://openalex.org/keywords/batch-processing","display_name":"Batch processing","score":0.3310000002384186}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7806000113487244},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7257999777793884},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.5613999962806702},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4596000015735626},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.43959999084472656},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.4327999949455261},{"id":"https://openalex.org/C172497479","wikidata":"https://www.wikidata.org/wiki/Q302208","display_name":"Capital expenditure","level":2,"score":0.37389999628067017},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.33970001339912415},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33799999952316284},{"id":"https://openalex.org/C172658912","wikidata":"https://www.wikidata.org/wiki/Q661613","display_name":"Batch processing","level":2,"score":0.3310000002384186},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32499998807907104},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.31360000371932983},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3034999966621399},{"id":"https://openalex.org/C99221444","wikidata":"https://www.wikidata.org/wiki/Q1532069","display_name":"Private information retrieval","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2581999897956848},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.2578999996185303}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/globecom59602.2025.11432531","is_oa":false,"landing_page_url":"https://doi.org/10.1109/globecom59602.2025.11432531","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"GLOBECOM 2025 - 2025 IEEE Global Communications Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W4206149382","https://openalex.org/W4225603018","https://openalex.org/W4317935380","https://openalex.org/W4320005538","https://openalex.org/W4385245566","https://openalex.org/W4387321091","https://openalex.org/W4388874804","https://openalex.org/W4390660333","https://openalex.org/W4394998727","https://openalex.org/W4400770592","https://openalex.org/W4406981251"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2],"artificial":[3],"intelligence":[4],"(AI)":[5],"have":[6],"yielded":[7],"sophisticated":[8],"natural":[9],"language":[10,17],"interaction":[11],"systems,":[12],"largely":[13],"driven":[14],"by":[15,25,113],"Large":[16],"models":[18,21],"(LLMs).":[19],"These":[20],"generate":[22],"text":[23],"auto-regressively":[24],"predicting":[26],"continuations.":[27],"However,":[28],"the":[29,129,137],"substantial":[30,78],"computational":[31],"demands":[32],"of":[33,96,139],"LLM":[34,64,97,117],"inference":[35,65,104,118,141],"typically":[36],"necessitate":[37],"reliance":[38],"on":[39,71,103,128,146],"cloud-based":[40,116],"services.":[41],"As":[42],"demand":[43],"escalates,":[44],"organizations":[45],"are":[46],"increasingly":[47],"exploring":[48],"private":[49,63,115],"cloud":[50],"deployments":[51],"to":[52],"bolster":[53],"information":[54],"security":[55],"and":[56,77,131,134,143],"comply":[57],"with":[58],"data":[59],"regulations.":[60],"Nevertheless,":[61],"deploying":[62],"servers":[66],"entails":[67],"significant":[68],"capital":[69],"expenditure":[70],"high-performance":[72],"Graphics":[73],"Processing":[74],"Units":[75],"(GPUs)":[76],"operational":[79],"costs,":[80],"including":[81],"high":[82],"power":[83],"consumption,":[84],"raising":[85],"environmental":[86],"concerns.":[87],"While":[88],"existing":[89],"research":[90],"has":[91],"explored":[92],"optimizing":[93],"individual":[94],"components":[95],"processing,":[98],"a":[99,120],"holistic,":[100],"system-level":[101],"perspective":[102],"efficiency":[105],"remains":[106],"underexplored.":[107],"This":[108],"paper":[109],"addresses":[110],"this":[111],"gap":[112],"formulating":[114],"as":[119],"stochastic":[121],"model.":[122],"We":[123],"analyze":[124],"its":[125],"performance,":[126],"focusing":[127],"throughput":[130],"processing":[132],"latency,":[133],"specifically":[135],"investigate":[136],"impact":[138],"different":[140],"methods":[142],"batch":[144],"sizes":[145],"overall":[147],"efficiency.":[148]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
