{"id":"https://openalex.org/W4414196925","doi":"https://doi.org/10.26599/bdma.2025.9020025","title":"Dynamic Batch Processing with FlexiDecode Scheduler for Efficient LLM Inference in IIoT","display_name":"Dynamic Batch Processing with FlexiDecode Scheduler for Efficient LLM Inference in IIoT","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4414196925","doi":"https://doi.org/10.26599/bdma.2025.9020025"},"language":"en","primary_location":{"id":"doi:10.26599/bdma.2025.9020025","is_oa":true,"landing_page_url":"https://doi.org/10.26599/bdma.2025.9020025","pdf_url":null,"source":{"id":"https://openalex.org/S4210209060","display_name":"Big Data Mining and Analytics","issn_l":"2096-0654","issn":["2096-0654"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311901","host_organization_name":"Tsinghua University Press","host_organization_lineage":["https://openalex.org/P4310311901"],"host_organization_lineage_names":["Tsinghua University Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data Mining and Analytics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.26599/bdma.2025.9020025","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030718716","display_name":"Xiaocong Jia","orcid":null},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaocong Jia","raw_affiliation_strings":["Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000","institution_ids":["https://openalex.org/I152269853"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012116011","display_name":"Bruce Gu","orcid":null},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bruce Gu","raw_affiliation_strings":["Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000","institution_ids":["https://openalex.org/I152269853"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101808353","display_name":"Jinjun Chen","orcid":"https://orcid.org/0000-0003-1677-9525"},"institutions":[{"id":"https://openalex.org/I57093077","display_name":"Swinburne University of Technology","ror":"https://ror.org/031rekg67","country_code":"AU","type":"education","lineage":["https://openalex.org/I57093077"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jinjun Chen","raw_affiliation_strings":["Swinburne University of Technology,Department of Computing Technologies,Melbourne,Australia,3000"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Swinburne University of Technology,Department of Computing Technologies,Melbourne,Australia,3000","institution_ids":["https://openalex.org/I57093077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012293028","display_name":"Longxiang Gao","orcid":"https://orcid.org/0000-0002-3026-7537"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longxiang Gao","raw_affiliation_strings":["Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000","institution_ids":["https://openalex.org/I152269853"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030163245","display_name":"Weiguang Pang","orcid":"https://orcid.org/0000-0003-0208-4677"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiguang Pang","raw_affiliation_strings":["Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000","institution_ids":["https://openalex.org/I152269853"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103626902","display_name":"G.D. Lv","orcid":null},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangtong Lv","raw_affiliation_strings":["Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000","institution_ids":["https://openalex.org/I152269853"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040772796","display_name":"Youyang Qu","orcid":"https://orcid.org/0000-0002-2944-4647"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Youyang Qu","raw_affiliation_strings":["Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000","institution_ids":["https://openalex.org/I152269853"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026963407","display_name":"Lei Cui","orcid":"https://orcid.org/0000-0002-1932-1440"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Cui","raw_affiliation_strings":["Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security of Ministry of Education,Jinan,China,250000","institution_ids":["https://openalex.org/I152269853"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.676,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.90714766,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"8","issue":"6","first_page":"1307","last_page":"1323"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11338","display_name":"Advancements in Photolithography Techniques","score":0.6115000247955322,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11338","display_name":"Advancements in Photolithography Techniques","score":0.6115000247955322,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14276","display_name":"Power Systems and Technologies","score":0.5230000019073486,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/batch-processing","display_name":"Batch processing","score":0.6315000057220459},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5961999893188477},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.49549999833106995},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.45590001344680786},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4122999906539917},{"id":"https://openalex.org/keywords/job-scheduler","display_name":"Job scheduler","score":0.3833000063896179},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.3774000108242035},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.373199999332428},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.3677999973297119}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8313999772071838},{"id":"https://openalex.org/C172658912","wikidata":"https://www.wikidata.org/wiki/Q661613","display_name":"Batch processing","level":2,"score":0.6315000057220459},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5961999893188477},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.5099999904632568},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.49549999833106995},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4595000147819519},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.45590001344680786},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4122999906539917},{"id":"https://openalex.org/C111873713","wikidata":"https://www.wikidata.org/wiki/Q1641413","display_name":"Job scheduler","level":3,"score":0.3833000063896179},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.373199999332428},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3677999973297119},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.35679998993873596},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.3546999990940094},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.349700003862381},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.31450000405311584},{"id":"https://openalex.org/C116537","wikidata":"https://www.wikidata.org/wiki/Q2169973","display_name":"Service provider","level":3,"score":0.2842000126838684},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.275299996137619},{"id":"https://openalex.org/C2776043813","wikidata":"https://www.wikidata.org/wiki/Q1759113","display_name":"Batch production","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C2776029614","wikidata":"https://www.wikidata.org/wiki/Q1146367","display_name":"File size","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C11644782","wikidata":"https://www.wikidata.org/wiki/Q15401790","display_name":"Cost efficiency","level":2,"score":0.2538999915122986},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.2531000077724457},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.2524999976158142},{"id":"https://openalex.org/C489000","wikidata":"https://www.wikidata.org/wiki/Q747385","display_name":"Data flow diagram","level":2,"score":0.251800000667572},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.26599/bdma.2025.9020025","is_oa":true,"landing_page_url":"https://doi.org/10.26599/bdma.2025.9020025","pdf_url":null,"source":{"id":"https://openalex.org/S4210209060","display_name":"Big Data Mining and Analytics","issn_l":"2096-0654","issn":["2096-0654"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311901","host_organization_name":"Tsinghua University Press","host_organization_lineage":["https://openalex.org/P4310311901"],"host_organization_lineage_names":["Tsinghua University Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data Mining and Analytics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:3400b184fa1b452e95a806a6ec88cf14","is_oa":true,"landing_page_url":"https://doaj.org/article/3400b184fa1b452e95a806a6ec88cf14","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Big Data Mining and Analytics, Vol 8, Iss 6, Pp 1307-1323 (2025)","raw_type":"article"},{"id":"pmh:oai:figshare.com:article/30283234","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal contribution"}],"best_oa_location":{"id":"doi:10.26599/bdma.2025.9020025","is_oa":true,"landing_page_url":"https://doi.org/10.26599/bdma.2025.9020025","pdf_url":null,"source":{"id":"https://openalex.org/S4210209060","display_name":"Big Data Mining and Analytics","issn_l":"2096-0654","issn":["2096-0654"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311901","host_organization_name":"Tsinghua University Press","host_organization_lineage":["https://openalex.org/P4310311901"],"host_organization_lineage_names":["Tsinghua University Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data Mining and Analytics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1461042731","display_name":null,"funder_award_id":"ZR20221150015","funder_id":"https://openalex.org/F4320324174","funder_display_name":"Natural Science Foundation of Shandong Province"},{"id":"https://openalex.org/G183588231","display_name":null,"funder_award_id":"2022ZD0116800","funder_id":"https://openalex.org/F4320329860","funder_display_name":"National Science and Technology Major Project"}],"funders":[{"id":"https://openalex.org/F4320324174","display_name":"Natural Science Foundation of Shandong Province","ror":null},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W2035301451","https://openalex.org/W2131641675","https://openalex.org/W2791170418","https://openalex.org/W2912924812","https://openalex.org/W2963926728","https://openalex.org/W2982157693","https://openalex.org/W3090350559","https://openalex.org/W3153427360","https://openalex.org/W3153523320","https://openalex.org/W3160021612","https://openalex.org/W4250424857","https://openalex.org/W4280622851","https://openalex.org/W4319063684","https://openalex.org/W4322766882","https://openalex.org/W4365512576","https://openalex.org/W4367595583","https://openalex.org/W4378574344","https://openalex.org/W4385245566","https://openalex.org/W4386262998","https://openalex.org/W4387321091","https://openalex.org/W4389544123","https://openalex.org/W4390561469","https://openalex.org/W4390581548","https://openalex.org/W4390872297","https://openalex.org/W4390874575","https://openalex.org/W4391136507","https://openalex.org/W4393156651","https://openalex.org/W4394952072","https://openalex.org/W4401211704","https://openalex.org/W4402351990","https://openalex.org/W4402811462","https://openalex.org/W4402835780","https://openalex.org/W4402896823","https://openalex.org/W4403201088","https://openalex.org/W4404031343","https://openalex.org/W4406650295"],"related_works":[],"abstract_inverted_index":{"Large":[0,197],"Language":[1,198],"Models":[2],"(LLMs)":[3],"are":[4],"expanding":[5],"their":[6],"applications":[7],"across":[8],"various":[9],"fields,":[10],"including":[11],"Industrial":[12],"Internet":[13],"of":[14,52,123],"Things":[15],"(IIoT),":[16],"where":[17,104],"they":[18],"analyze":[19],"sensor":[20],"data,":[21],"automate":[22],"diagnostics,":[23],"and":[24,45,83,163,179,201,208,221,224],"enhance":[25],"predictive":[26],"maintenance.":[27],"LLM":[28],"inference":[29,39],"is":[30],"provided":[31],"by":[32,148,219,228],"service":[33,88],"providers":[34],"to":[35,48,65,121,144,175,186,195],"users,":[36],"with":[37],"each":[38],"request":[40,98,182,188,216],"undergoing":[41],"two":[42],"phases:":[43],"prefill":[44],"decode.":[46],"Due":[47],"the":[49,112,127,151],"autoregressive":[50],"nature":[51],"generation,":[53],"only":[54],"one":[55],"token":[56],"can":[57,107],"be":[58],"produced":[59],"per":[60],"iteration,":[61],"necessitating":[62],"multiple":[63,73],"iterations":[64],"complete":[66],"a":[67,76,90,115,181,206],"request.":[68],"Typically,":[69],"batch":[70,78,92,117,153],"processing":[71],"groups":[72],"requests":[74,174],"into":[75],"single":[77],"for":[79],"inference,":[80],"improving":[81,160],"throughput":[82],"hardware":[84],"utilization.":[85],"However,":[86],"in":[87,101,133,211],"systems,":[89],"fixed":[91,116],"size":[93,118,154],"presents":[94],"challenges":[95,147],"under":[96],"fluctuating":[97],"volumes,":[99],"particularly":[100],"IIoT":[102],"environments,":[103],"data":[105],"flow":[106],"vary":[108],"significantly.":[109],"Specifically,":[110],"during":[111,126,167],"high-load":[113,168],"periods,":[114,129],"may":[119,131],"lead":[120],"underutilization":[122],"resources,":[124],"while":[125],"low-load":[128],"it":[130],"result":[132],"resource":[134,161],"wastage.":[135],"In":[136],"this":[137],"paper,":[138],"we":[139],"introduce":[140],"FlexiDecode":[141],"Scheduler":[142],"(FDS)":[143],"address":[145],"these":[146],"dynamically":[149],"adjusting":[150],"decoding":[152,177],"based":[155],"on":[156],"system":[157],"load":[158],"conditions,":[159],"utilization,":[162],"reducing":[164],"wait":[165],"time":[166,218],"periods.":[169],"FDS":[170],"prioritizes":[171],"prefilling":[172],"new":[173],"maximize":[176],"efficiency":[178],"employs":[180],"output":[183],"length":[184],"predictor":[185],"optimize":[187],"scheduling,":[189],"minimizing":[190],"End-to-End":[191],"(E2E)":[192],"latency.":[193],"Compared":[194],"virtual":[196],"Model":[199],"(vLLM)":[200],"Sarathi,":[202],"our":[203],"approach":[204],"achieves":[205],"23%":[207],"16%":[209],"reduction":[210],"E2E":[212],"latency,":[213],"improves":[214],"actual":[215],"execution":[217],"34%":[220],"15%,":[222],"respectively,":[223],"increases":[225],"computational":[226],"utilization":[227],"10%.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
