{"id":"https://openalex.org/W4409581432","doi":"https://doi.org/10.1109/tsc.2025.3562362","title":"Enhancing LLM QoS Through Cloud-Edge Collaboration: A Diffusion-Based Multi-Agent Reinforcement Learning Approach","display_name":"Enhancing LLM QoS Through Cloud-Edge Collaboration: A Diffusion-Based Multi-Agent Reinforcement Learning Approach","publication_year":2025,"publication_date":"2025-04-18","ids":{"openalex":"https://openalex.org/W4409581432","doi":"https://doi.org/10.1109/tsc.2025.3562362"},"language":"en","primary_location":{"id":"doi:10.1109/tsc.2025.3562362","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsc.2025.3562362","pdf_url":null,"source":{"id":"https://openalex.org/S204223317","display_name":"IEEE Transactions on Services Computing","issn_l":"1939-1374","issn":["1939-1374","2372-0204"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Services Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100356095","display_name":"Zhi Yao","orcid":"https://orcid.org/0000-0003-1382-225X"},"institutions":[{"id":"https://openalex.org/I25254941","display_name":"Beijing Normal University","ror":"https://ror.org/022k4wk35","country_code":"CN","type":"education","lineage":["https://openalex.org/I25254941"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhi Yao","raw_affiliation_strings":["School of Artificial Intelligence, Beijing Normal University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beijing Normal University, Beijing, China","institution_ids":["https://openalex.org/I25254941"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067856743","display_name":"Zhiqing Tang","orcid":"https://orcid.org/0000-0002-9375-4818"},"institutions":[{"id":"https://openalex.org/I25254941","display_name":"Beijing Normal University","ror":"https://ror.org/022k4wk35","country_code":"CN","type":"education","lineage":["https://openalex.org/I25254941"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiqing Tang","raw_affiliation_strings":["Institute of Artificial Intelligence and Future Networks, Beijing Normal University, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence and Future Networks, Beijing Normal University, Zhuhai, China","institution_ids":["https://openalex.org/I25254941"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019284302","display_name":"Wenmian Yang","orcid":"https://orcid.org/0000-0001-8493-4449"},"institutions":[{"id":"https://openalex.org/I25254941","display_name":"Beijing Normal University","ror":"https://ror.org/022k4wk35","country_code":"CN","type":"education","lineage":["https://openalex.org/I25254941"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenmian Yang","raw_affiliation_strings":["Institute of Artificial Intelligence and Future Networks, Beijing Normal University, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence and Future Networks, Beijing Normal University, Zhuhai, China","institution_ids":["https://openalex.org/I25254941"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101752580","display_name":"Weijia Jia","orcid":"https://orcid.org/0000-0003-1000-3937"},"institutions":[{"id":"https://openalex.org/I25254941","display_name":"Beijing Normal University","ror":"https://ror.org/022k4wk35","country_code":"CN","type":"education","lineage":["https://openalex.org/I25254941"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weijia Jia","raw_affiliation_strings":["Institute of Artificial Intelligence and Future Networks, Beijing Normal University, Zhuhai, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence and Future Networks, Beijing Normal University, Zhuhai, China","institution_ids":["https://openalex.org/I25254941"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100356095"],"corresponding_institution_ids":["https://openalex.org/I25254941"],"apc_list":null,"apc_paid":null,"fwci":27.8688,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.99462815,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"18","issue":"3","first_page":"1412","last_page":"1427"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9340999722480774,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9340999722480774,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8241451978683472},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8077463507652283},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.7097355723381042},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.5812007188796997},{"id":"https://openalex.org/keywords/quality-of-service","display_name":"Quality of service","score":0.5004422664642334},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4782392382621765},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.43427667021751404},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.4313387870788574},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.3475494384765625},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2859061360359192}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8241451978683472},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8077463507652283},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.7097355723381042},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.5812007188796997},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.5004422664642334},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4782392382621765},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.43427667021751404},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.4313387870788574},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3475494384765625},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2859061360359192},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsc.2025.3562362","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsc.2025.3562362","pdf_url":null,"source":{"id":"https://openalex.org/S204223317","display_name":"IEEE Transactions on Services Computing","issn_l":"1939-1374","issn":["1939-1374","2372-0204"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Services Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1430800624","display_name":null,"funder_award_id":"62302048","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8160964939","display_name":null,"funder_award_id":"62272050","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320326279","display_name":"Department of Education of Guangdong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W2263893300","https://openalex.org/W2768629321","https://openalex.org/W2898155611","https://openalex.org/W2951360122","https://openalex.org/W2965834989","https://openalex.org/W2971870892","https://openalex.org/W3023295509","https://openalex.org/W3174809957","https://openalex.org/W4206172874","https://openalex.org/W4220897963","https://openalex.org/W4284892042","https://openalex.org/W4285225959","https://openalex.org/W4293210321","https://openalex.org/W4312285477","https://openalex.org/W4312817101","https://openalex.org/W4366341216","https://openalex.org/W4384916684","https://openalex.org/W4385245566","https://openalex.org/W4385287322","https://openalex.org/W4385571011","https://openalex.org/W4386858689","https://openalex.org/W4386858868","https://openalex.org/W4387245356","https://openalex.org/W4388483039","https://openalex.org/W4388561289","https://openalex.org/W4389519226","https://openalex.org/W4390263770","https://openalex.org/W4390659326","https://openalex.org/W4390828296","https://openalex.org/W4390828922","https://openalex.org/W4391019625","https://openalex.org/W4392152415","https://openalex.org/W4392902389","https://openalex.org/W4396817294","https://openalex.org/W4399939100","https://openalex.org/W4400641571","https://openalex.org/W4401211704","https://openalex.org/W4401725315","https://openalex.org/W4401943272","https://openalex.org/W4402050440","https://openalex.org/W4403421590","https://openalex.org/W4404181035","https://openalex.org/W4405099035","https://openalex.org/W6779823529","https://openalex.org/W6788175385","https://openalex.org/W6790690058","https://openalex.org/W6796589144","https://openalex.org/W6840380725","https://openalex.org/W6841549819","https://openalex.org/W6842958722","https://openalex.org/W6847478871","https://openalex.org/W6850503672","https://openalex.org/W6852686595","https://openalex.org/W6856800273","https://openalex.org/W6869417273","https://openalex.org/W6872445069","https://openalex.org/W6877022392"],"related_works":["https://openalex.org/W4313339048","https://openalex.org/W3176734149","https://openalex.org/W3201779876","https://openalex.org/W3113627641","https://openalex.org/W3191964704","https://openalex.org/W2918879532","https://openalex.org/W3083220997","https://openalex.org/W2885461866","https://openalex.org/W3162654428","https://openalex.org/W2901937988"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"are":[4],"widely":[5],"used":[6],"across":[7],"various":[8,124],"domains,":[9],"but":[10],"deploying":[11],"them":[12],"in":[13,61,170,182],"cloud":[14,172],"data":[15],"centers":[16],"often":[17],"leads":[18],"to":[19,123,158,166,228],"significant":[20],"response":[21,49,101],"delays":[22,200],"and":[23,51,120,141,201,221],"high":[24],"costs,":[25],"undermining":[26],"Quality":[27],"of":[28,207],"Service":[29],"(QoS)":[30],"at":[31,40,93],"the":[32,41,94,115,131,160,168,171,177,229],"network":[33,157],"edge.":[34],"Although":[35],"caching":[36],"LLM":[37,90,161,169],"request":[38,91,162,167],"results":[39,92,175,189],"edge":[42,95,185,205],"using":[43,96],"vector":[44,97,179],"databases":[45],"can":[46],"greatly":[47],"reduce":[48],"times":[50,102],"costs":[52],"for":[53,103,204,218,224],"similar":[54,105,219],"requests,":[55],"this":[56],"approach":[57],"has":[58],"been":[59],"overlooked":[60],"prior":[62],"research.":[63],"To":[64],"address":[65],"this,":[66],"we":[67,129],"propose":[68],"a":[69,136,154,183],"novel":[70],"<underline":[71,79,83],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[72,76,80,84],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">V</u>ector":[73],"database-assisted":[74],"cloud-<underline":[75],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">E</u>dge":[77],"collaborative":[78],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">L</u>LM":[81],"QoS":[82,132],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">O</u>ptimization":[85],"(VELO)":[86],"framework":[87],"that":[88,109,191],"caches":[89],"databases,":[98],"thereby":[99],"reducing":[100,199],"subsequent":[104],"requests.":[106],"Unlike":[107],"methods":[108],"modify":[110],"LLMs":[111],"directly,":[112],"VELO":[113,192],"leaves":[114],"LLM's":[116],"internal":[117],"structure":[118],"intact":[119],"is":[121],"applicable":[122],"LLMs.":[125,208],"Building":[126],"on":[127,146,216],"VELO,":[128],"formulate":[130],"optimization":[133],"problem":[134],"as":[135],"Markov":[137],"Decision":[138],"Process":[139],"(MDP)":[140],"design":[142],"an":[143],"algorithm":[144,152,211],"based":[145],"Multi-Agent":[147],"Reinforcement":[148],"Learning":[149],"(MARL).":[150],"Our":[151,209],"employs":[153],"diffusion-based":[155],"policy":[156],"extract":[159],"features,":[163],"determining":[164],"whether":[165],"or":[173],"retrieve":[174],"from":[176],"edge's":[178],"database.":[180],"Implemented":[181],"real":[184],"system,":[186],"our":[187],"experimental":[188],"demonstrate":[190],"significantly":[193],"enhances":[194],"user":[195],"satisfaction":[196],"by":[197,214,222],"simultaneously":[198],"resource":[202],"consumption":[203],"users":[206],"DLRS":[210],"improves":[212],"performance":[213],"15.0%":[215],"average":[217],"requests":[220,226],"14.6%":[223],"new":[225],"compared":[227],"baselines.":[230]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
