{"id":"https://openalex.org/W7161960723","doi":"https://doi.org/10.48550/arxiv.2605.20863","title":"PlexRL: Cluster-Level Orchestration of Serviceized LLM Execution for RLVR","display_name":"PlexRL: Cluster-Level Orchestration of Serviceized LLM Execution for RLVR","publication_year":2026,"publication_date":"2026-05-20","ids":{"openalex":"https://openalex.org/W7161960723","doi":"https://doi.org/10.48550/arxiv.2605.20863"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.20863","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.20863","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.20863","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136667478","display_name":"Yiqi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yiqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136712934","display_name":"Fangzheng Jiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiao, Fangzheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136642377","display_name":"Tian Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Tian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101293574","display_name":"Boyu Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Boyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032497440","display_name":"Hangyu Wang","orcid":"https://orcid.org/0009-0006-0410-5426"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hangyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136690897","display_name":"Qiaoling Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Qiaoling","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019583668","display_name":"Guoteng Wang","orcid":"https://orcid.org/0000-0002-1018-5673"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Guoteng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136718573","display_name":"Zhen Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Zhen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136665372","display_name":"Peng Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Peng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136686788","display_name":"Zhang, Ping, 1965-","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136633378","display_name":"Xiaohe Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Xiaohe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136700009","display_name":"Ziming Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ziming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136697361","display_name":"Menghao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Menghao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136625301","display_name":"Yanmin Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Yanmin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136632396","display_name":"Yang You","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"You, Yang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136668166","display_name":"Siyuan Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Siyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":16,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.12460000067949295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.12460000067949295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.10440000146627426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.08990000188350677,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6279000043869019},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.6121000051498413},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.5698000192642212},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5691999793052673},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.49779999256134033},{"id":"https://openalex.org/keywords/idle","display_name":"Idle","score":0.47600001096725464},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.44839999079704285},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.4262999892234802},{"id":"https://openalex.org/keywords/orchestration","display_name":"Orchestration","score":0.41429999470710754}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8281999826431274},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6279000043869019},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.6121000051498413},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5968999862670898},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.5698000192642212},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5691999793052673},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.49779999256134033},{"id":"https://openalex.org/C16320812","wikidata":"https://www.wikidata.org/wiki/Q1812200","display_name":"Idle","level":2,"score":0.47600001096725464},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.44839999079704285},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.4262999892234802},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.41429999470710754},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.413100004196167},{"id":"https://openalex.org/C19275194","wikidata":"https://www.wikidata.org/wiki/Q222903","display_name":"Multiplexing","level":2,"score":0.3779999911785126},{"id":"https://openalex.org/C172191483","wikidata":"https://www.wikidata.org/wiki/Q1071806","display_name":"Provisioning","level":2,"score":0.3472000062465668},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3262999951839447},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.30169999599456787},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.30149999260902405},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.28200000524520874},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.2784999907016754},{"id":"https://openalex.org/C65813073","wikidata":"https://www.wikidata.org/wiki/Q1622420","display_name":"High availability","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C513985346","wikidata":"https://www.wikidata.org/wiki/Q270471","display_name":"Virtualization","level":3,"score":0.2619999945163727},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.25189998745918274},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.20863","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.20863","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.20863","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.20863","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.532151460647583,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"with":[2],"verifiable":[3],"rewards":[4],"(RLVR)":[5],"has":[6],"recently":[7],"unlocked":[8],"strong":[9],"reasoning":[10],"capabilities":[11],"in":[12],"large":[13],"language":[14],"models":[15],"(LLMs),":[16],"triggering":[17],"rapid":[18],"exploration":[19],"of":[20],"new":[21],"algorithms":[22],"and":[23,35,41,85,116,140,150,163],"data.":[24],"However,":[25],"RLVR":[26,77,107],"training":[27,42],"is":[28,68],"notoriously":[29],"inefficient:":[30],"long-tailed":[31],"rollouts,":[32],"tool-induced":[33],"stalls,":[34],"asymmetric":[36],"resource":[37],"requirements":[38],"between":[39],"rollout":[40],"introduce":[43],"substantial":[44],"idle":[45,71,132],"time":[46],"that":[47,65,143],"cannot":[48],"be":[49],"eliminated":[50],"by":[51,156],"job-local":[52],"optimizations":[53],"such":[54],"as":[55],"synchronous":[56],"pipelining,":[57],"asynchronous":[58],"rollout,":[59],"or":[60],"colocated":[61],"execution.":[62],"We":[63],"argue":[64],"this":[66,93],"inefficiency":[67],"structural.":[69],"While":[70],"gaps":[72],"are":[73,80],"unavoidable":[74],"within":[75],"individual":[76],"jobs,":[78],"they":[79],"largely":[81],"anti-correlated":[82],"across":[83,106,127],"jobs":[84,128],"therefore":[86],"exploitable":[87],"at":[88],"the":[89],"cluster":[90,148],"level.":[91],"Leveraging":[92],"observation,":[94],"we":[95],"present":[96],"PlexRL,":[97],"a":[98],"cluster-level":[99],"runtime":[100],"for":[101],"multiplexing":[102],"unified":[103],"LLM":[104,125],"services":[105],"jobs.":[108],"By":[109],"centrally":[110],"managing":[111],"model":[112,136],"placement,":[113],"state":[114],"transitions,":[115],"function-level":[117],"scheduling":[118],"under":[119],"strict":[120],"affinity":[121],"constraints,":[122],"PlexRL":[123,144],"time-slices":[124],"execution":[126],"to":[129],"fill":[130],"otherwise":[131],"periods":[133],"without":[134],"expensive":[135],"migration.":[137],"Our":[138],"implementation":[139],"evaluations":[141],"demonstrate":[142],"significantly":[145],"improves":[146],"effective":[147],"capacity":[149],"reduces":[151],"user":[152],"GPU":[153],"hour":[154],"cost":[155],"maximum":[157],"37.58%":[158],"while":[159],"preserving":[160],"algorithmic":[161],"flexibility":[162],"introducing":[164],"minimal":[165],"per-job":[166],"overhead.":[167]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-22T00:00:00"}
