{"id":"https://openalex.org/W7118199371","doi":"https://doi.org/10.1109/tmc.2026.3650838","title":"Joint Optimization of Dynamic Batching and Adaptive Partitioning for Distributed LLMs Inference in Mobile Edge Computing","display_name":"Joint Optimization of Dynamic Batching and Adaptive Partitioning for Distributed LLMs Inference in Mobile Edge Computing","publication_year":2026,"publication_date":"2026-01-05","ids":{"openalex":"https://openalex.org/W7118199371","doi":"https://doi.org/10.1109/tmc.2026.3650838"},"language":null,"primary_location":{"id":"doi:10.1109/tmc.2026.3650838","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmc.2026.3650838","pdf_url":null,"source":{"id":"https://openalex.org/S69141925","display_name":"IEEE Transactions on Mobile Computing","issn_l":"1536-1233","issn":["1536-1233","1558-0660","2161-9875"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Mobile Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004966176","display_name":"Tong Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tong Zheng","raw_affiliation_strings":["School of Computer Science and Engineering, Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0000-0002-3247-0846","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121927979","display_name":"Yuanguo Bi","orcid":null},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanguo Bi","raw_affiliation_strings":["School of Computer Science and Engineering, Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0000-0002-8424-8542","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070719868","display_name":"Guangjie Han","orcid":"https://orcid.org/0000-0002-6921-7369"},"institutions":[{"id":"https://openalex.org/I163340411","display_name":"Hohai University","ror":"https://ror.org/01wd4xt90","country_code":"CN","type":"education","lineage":["https://openalex.org/I163340411"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangjie Han","raw_affiliation_strings":["Key Laboratory of Maritime Intelligent Network Information Technology, Ministry of Education, Hohai University, Changzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-6921-7369","affiliations":[{"raw_affiliation_string":"Key Laboratory of Maritime Intelligent Network Information Technology, Ministry of Education, Hohai University, Changzhou, China","institution_ids":["https://openalex.org/I163340411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027495587","display_name":"Tianao Xiang","orcid":"https://orcid.org/0000-0003-4374-2314"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianao Xiang","raw_affiliation_strings":["School of Computer Science and Engineering, Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0000-0003-4374-2314","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114159272","display_name":"Lexi Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I6507939","display_name":"China United Network Communications Group (China)","ror":"https://ror.org/028w99c90","country_code":"CN","type":"company","lineage":["https://openalex.org/I6507939"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lexi Xu","raw_affiliation_strings":["Research Institute, China United Network Communications Corporation, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4338-7252","affiliations":[{"raw_affiliation_string":"Research Institute, China United Network Communications Corporation, Beijing, China","institution_ids":["https://openalex.org/I6507939"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019915847","display_name":"Qiang He","orcid":"https://orcid.org/0000-0002-1820-6141"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiang He","raw_affiliation_strings":["School of Computer Science and Engineering, Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0000-0002-1820-6141","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114010808","display_name":"Liang Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I125904092","display_name":"Shenyang Aerospace University","ror":"https://ror.org/02423gm04","country_code":"CN","type":"education","lineage":["https://openalex.org/I125904092"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Zhao","raw_affiliation_strings":["School of Computer Science, Shenyang Aerospace University, Shenyang, China"],"raw_orcid":"https://orcid.org/0000-0001-5829-6850","affiliations":[{"raw_affiliation_string":"School of Computer Science, Shenyang Aerospace University, Shenyang, China","institution_ids":["https://openalex.org/I125904092"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5004966176"],"corresponding_institution_ids":["https://openalex.org/I9224756"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02173738,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"25","issue":"6","first_page":"8747","last_page":"8763"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.25870001316070557,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.25870001316070557,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.12939999997615814,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.03970000147819519,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6532999873161316},{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.5148000121116638},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.49709999561309814},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4722999930381775},{"id":"https://openalex.org/keywords/mobile-edge-computing","display_name":"Mobile edge computing","score":0.47200000286102295},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.42089998722076416},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.39469999074935913},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.3671000003814697},{"id":"https://openalex.org/keywords/mobile-device","display_name":"Mobile device","score":0.3569999933242798}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8787000179290771},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6532999873161316},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6438999772071838},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.5148000121116638},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.49709999561309814},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4722999930381775},{"id":"https://openalex.org/C2776061582","wikidata":"https://www.wikidata.org/wiki/Q25325231","display_name":"Mobile edge computing","level":3,"score":0.47200000286102295},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.42089998722076416},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.39469999074935913},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.3671000003814697},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C144543869","wikidata":"https://www.wikidata.org/wiki/Q2738570","display_name":"Mobile computing","level":2,"score":0.35659998655319214},{"id":"https://openalex.org/C158207573","wikidata":"https://www.wikidata.org/wiki/Q5747224","display_name":"Heterogeneous network","level":4,"score":0.3555999994277954},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.3465999960899353},{"id":"https://openalex.org/C127964446","wikidata":"https://www.wikidata.org/wiki/Q1092142","display_name":"Computational resource","level":3,"score":0.34150001406669617},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.3278999924659729},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.2962000072002411},{"id":"https://openalex.org/C106365562","wikidata":"https://www.wikidata.org/wiki/Q3078360","display_name":"Radio access network","level":4,"score":0.290800005197525},{"id":"https://openalex.org/C95491727","wikidata":"https://www.wikidata.org/wiki/Q992968","display_name":"Mobile telephony","level":3,"score":0.29019999504089355},{"id":"https://openalex.org/C138959212","wikidata":"https://www.wikidata.org/wiki/Q1806783","display_name":"Load balancing (electrical power)","level":3,"score":0.2856999933719635},{"id":"https://openalex.org/C108037233","wikidata":"https://www.wikidata.org/wiki/Q11375","display_name":"Wireless network","level":3,"score":0.28459998965263367},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmc.2026.3650838","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmc.2026.3650838","pdf_url":null,"source":{"id":"https://openalex.org/S69141925","display_name":"IEEE Transactions on Mobile Computing","issn_l":"1536-1233","issn":["1536-1233","1558-0660","2161-9875"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Mobile Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.40855929255485535,"display_name":"Decent work and economic growth"}],"awards":[{"id":"https://openalex.org/G6074469635","display_name":null,"funder_award_id":"62471121","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W2044033049","https://openalex.org/W3170672970","https://openalex.org/W3200083976","https://openalex.org/W3214374352","https://openalex.org/W4281554877","https://openalex.org/W4286377419","https://openalex.org/W4289821779","https://openalex.org/W4385245566","https://openalex.org/W4386385424","https://openalex.org/W4391164333","https://openalex.org/W4391986993","https://openalex.org/W4392901673","https://openalex.org/W4393184767","https://openalex.org/W4400111235","https://openalex.org/W4400447774","https://openalex.org/W4400726969","https://openalex.org/W4400770592","https://openalex.org/W4401211704","https://openalex.org/W4401415014","https://openalex.org/W4402350363","https://openalex.org/W4402592509","https://openalex.org/W4402811232","https://openalex.org/W4404101447","https://openalex.org/W4404562740","https://openalex.org/W4405934565","https://openalex.org/W4406207083","https://openalex.org/W4407218342","https://openalex.org/W4408017080","https://openalex.org/W4408793796","https://openalex.org/W4412412131","https://openalex.org/W4413180548","https://openalex.org/W4414008559","https://openalex.org/W4417439031","https://openalex.org/W7084103609","https://openalex.org/W7124159067"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"are":[4],"revolutionizing":[5],"various":[6],"fields":[7],"due":[8],"to":[9,55,57,109,131,158,216],"their":[10,15],"powerful":[11],"generation":[12],"capabilities.":[13],"However,":[14,51],"immense":[16],"computational":[17],"complexity":[18],"poses":[19],"significant":[20],"challenges":[21],"in":[22,65,211],"resource":[23,114,118,163,213],"consumption,":[24],"inference":[25,111,148,204],"latency,":[26],"and":[27,60,81,113,119,124,162,208],"data":[28],"privacy":[29],"for":[30,90,122,178,186],"traditional":[31],"cloud-centric":[32],"deployments.":[33],"Edge":[34,67],"artificial":[35],"intelligence":[36],"(Edge-AI)":[37],"offers":[38],"promising":[39],"LLMs":[40,91],"deployment":[41,106],"solutions":[42],"by":[43,149,206],"leveraging":[44],"distributed":[45],"resources":[46,64,97],"at":[47],"the":[48,144,151,160,196,217],"network":[49,191],"edge.":[50],"existing":[52],"approaches":[53],"struggle":[54],"adapt":[56],"dynamic":[58,137,190],"workloads":[59],"efficiently":[61],"utilize":[62],"heterogeneous":[63,117],"Mobile":[66],"Computing":[68],"(MEC)":[69],"environments.":[70,192],"This":[71],"paper":[72],"proposes":[73],"a":[74,104,136],"<underline":[75,78,82,85],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[76,79,83,86],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Dy</u>namic":[77],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">B</u>atching":[80],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">A</u>daptive":[84],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">P</u>artitioning":[87],"(DyBAP)":[88],"scheme":[89],"deployment,":[92],"which":[93,127],"utilizes":[94],"ubiquitous":[95],"geo-distributed":[96],"via":[98],"end-edge-cloud":[99,218],"collaboration.":[100],"Firstly,":[101],"we":[102,134],"formulate":[103],"collaboration":[105,219],"optimization":[107,140,168],"problem":[108],"minimize":[110],"latency":[112,123,161,205],"usage":[115],"under":[116],"user":[120],"requirements":[121],"accuracy":[125],"constraints,":[126],"is":[128,176],"NP-hard.":[129],"Secondly,":[130],"solve":[132],"this,":[133],"develop":[135],"batch":[138,145],"fusion":[139],"algorithm":[141,169],"that":[142],"optimizes":[143],"size":[146],"of":[147,155,198],"utilizing":[150],"parallel":[152],"processing":[153],"power":[154],"computing":[156],"units":[157],"balance":[159],"usage.":[164],"A":[165],"block-aware":[166],"partition":[167],"based":[170],"on":[171],"multi-agent":[172],"reinforcement":[173],"learning":[174],"(MARL)":[175],"proposed":[177],"efficient":[179],"transformer":[180],"block":[181],"allocation,":[182],"integrating":[183],"mobility":[184],"awareness":[185],"optimal":[187],"partitioning":[188],"across":[189],"Simulation":[193],"results":[194],"demonstrate":[195],"superiority":[197],"DyBAP":[199],"over":[200],"other":[201],"benchmarks,":[202],"reducing":[203],"17.94%":[207],"saving":[209],"11.12%":[210],"memory":[212],"consumption":[214],"compared":[215],"approaches.":[220]},"counts_by_year":[],"updated_date":"2026-05-09T06:09:20.037420","created_date":"2026-01-05T00:00:00"}
