{"id":"https://openalex.org/W7134056205","doi":"https://doi.org/10.48550/arxiv.2603.05087","title":"PromptTuner: SLO-Aware Elastic System for LLM Prompt Tuning","display_name":"PromptTuner: SLO-Aware Elastic System for LLM Prompt Tuning","publication_year":2026,"publication_date":"2026-03-05","ids":{"openalex":"https://openalex.org/W7134056205","doi":"https://doi.org/10.48550/arxiv.2603.05087"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.05087","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128230937","display_name":"Wei Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128277641","display_name":"Peng Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Peng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083116172","display_name":"Dmitrii Ustiugov","orcid":"https://orcid.org/0000-0003-3156-010X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ustiugov, Dmitrii","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128251949","display_name":"Tianwei Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Tianwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128242097","display_name":"Yonggang Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Yonggang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.29760000109672546,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.29760000109672546,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.1387999951839447,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.049400001764297485,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/provisioning","display_name":"Provisioning","score":0.6556000113487244},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.5015000104904175},{"id":"https://openalex.org/keywords/downstream","display_name":"Downstream (manufacturing)","score":0.4918000102043152},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.4903999865055084},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4343999922275543},{"id":"https://openalex.org/keywords/resource-management","display_name":"Resource management (computing)","score":0.4205000102519989},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4009999930858612}],"concepts":[{"id":"https://openalex.org/C172191483","wikidata":"https://www.wikidata.org/wiki/Q1071806","display_name":"Provisioning","level":2,"score":0.6556000113487244},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6248000264167786},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.5015000104904175},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.4918000102043152},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.4903999865055084},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.4494999945163727},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4343999922275543},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.4205000102519989},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4009999930858612},{"id":"https://openalex.org/C2777958785","wikidata":"https://www.wikidata.org/wiki/Q17120940","display_name":"Resource efficiency","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.3246999979019165},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.2906999886035919},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C3017813396","wikidata":"https://www.wikidata.org/wiki/Q17078173","display_name":"Resource constraints","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C198783460","wikidata":"https://www.wikidata.org/wiki/Q629173","display_name":"Management system","level":2,"score":0.257999986410141}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.05087","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.05087","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.05087","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.05087","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.6043436527252197,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Prompt":[0,101],"tuning":[1,32,76],"has":[2],"become":[3],"a":[4,100],"prominent":[5],"strategy":[6],"for":[7,30,57,73],"enhancing":[8],"the":[9,27,110,127],"performance":[10],"of":[11,112],"Large":[12],"Language":[13],"Models":[14],"(LLMs)":[15],"on":[16,34],"downstream":[17,35],"tasks.":[18,36],"Many":[19],"IT":[20],"enterprises":[21],"now":[22],"offer":[23],"Prompt-Tuning-as-a-Service":[24],"to":[25,41,69,88,103,108,120,125,152],"fulfill":[26],"growing":[28],"demand":[29],"prompt":[31,75,91,113],"LLMs":[33],"Their":[37],"primary":[38],"objective":[39],"is":[40],"satisfy":[42],"users":[43],"Service":[44],"Level":[45],"Objectives":[46],"(SLOs)":[47],"while":[48],"reducing":[49],"resource":[50,61,123,131],"provisioning":[51],"costs.":[52,132],"Nevertheless,":[53],"our":[54,134],"characterization":[55],"analysis":[56],"existing":[58],"deep":[59],"learning":[60],"management":[62],"systems":[63],"reveals":[64],"that":[65],"they":[66],"are":[67],"insufficient":[68],"optimize":[70,89],"these":[71],"objectives":[72],"LLM":[74,90],"workloads.":[77],"In":[78,133],"this":[79],"paper,":[80],"we":[81],"introduce":[82],"PromptTuner,":[83],"an":[84],"SLO-aware":[85],"elastic":[86],"system":[87],"tuning.":[92,114],"It":[93],"contains":[94],"two":[95],"innovations.":[96],"(1)":[97],"We":[98,116],"design":[99],"Bank":[102],"identify":[104],"efficient":[105],"initial":[106],"prompts":[107],"expedite":[109],"convergence":[111],"(2)":[115],"develop":[117],"aWorkload":[118],"Scheduler":[119],"enable":[121],"fast":[122],"allocation":[124],"reduce":[126],"SLO":[128,138],"violation":[129],"and":[130,142,144,149,154],"evaluation,":[135],"PromptTuner":[136],"reduces":[137],"violations":[139],"by":[140,147],"4.0x":[141],"7.9x,":[143],"lowers":[145],"costs":[146],"1.6x":[148],"4.5x,":[150],"compared":[151],"INFless":[153],"ElasticFlow":[155],"respectively.":[156]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-07T00:00:00"}
