{"id":"https://openalex.org/W7134903663","doi":"https://doi.org/10.1145/3779212.3790172","title":"History Doesn't Repeat Itself but Rollouts Rhyme: Accelerating Reinforcement Learning with RhymeRL","display_name":"History Doesn't Repeat Itself but Rollouts Rhyme: Accelerating Reinforcement Learning with RhymeRL","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7134903663","doi":"https://doi.org/10.1145/3779212.3790172"},"language":null,"primary_location":{"id":"doi:10.1145/3779212.3790172","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790172","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3779212.3790172","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122984255","display_name":"Jingkai He","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingkai He","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0005-9024-7588","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128718387","display_name":"Tianjian Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tianjian Li","raw_affiliation_strings":["ByteDance, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0004-0985-8684","affiliations":[{"raw_affiliation_string":"ByteDance, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051943243","display_name":"Erhu Feng","orcid":"https://orcid.org/0009-0006-5957-3024"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Erhu Feng","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0006-5957-3024","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102829139","display_name":"Dong Du","orcid":"https://orcid.org/0000-0002-7945-8430"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dong Du","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-7945-8430","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128706445","display_name":"Qian Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian Liu","raw_affiliation_strings":["ByteDance, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0009-0004-1230-130X","affiliations":[{"raw_affiliation_string":"ByteDance, Singapore, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tao Liu","orcid":"https://orcid.org/0009-0000-8150-5729"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao Liu","raw_affiliation_strings":["ByteDance, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0009-0000-8150-5729","affiliations":[{"raw_affiliation_string":"ByteDance, Singapore, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yubin Xia","orcid":"https://orcid.org/0000-0001-6558-5298"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yubin Xia","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-6558-5298","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":null,"display_name":"Haibo Chen","orcid":"https://orcid.org/0000-0002-9720-0361"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haibo Chen","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-9720-0361","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31862926,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"929","last_page":"945"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5461999773979187,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5461999773979187,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.05139999836683273,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.042100001126527786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7910000085830688},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6276999711990356},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5756999850273132},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.5442000031471252},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4805000126361847},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.45500001311302185},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.43540000915527344},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4169999957084656}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7910000085830688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.765999972820282},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6276999711990356},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5756999850273132},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.5442000031471252},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4805000126361847},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45969998836517334},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.45500001311302185},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.43540000915527344},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4169999957084656},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3995000123977661},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39590001106262207},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.3853999972343445},{"id":"https://openalex.org/C46355384","wikidata":"https://www.wikidata.org/wiki/Q726686","display_name":"Compromise","level":2,"score":0.36079999804496765},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.351500004529953},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.3343000113964081},{"id":"https://openalex.org/C106195933","wikidata":"https://www.wikidata.org/wiki/Q7847935","display_name":"Truncation (statistics)","level":2,"score":0.33390000462532043},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.31690001487731934},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.2678999900817871}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3779212.3790172","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790172","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3779212.3790172","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790172","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2059513841","https://openalex.org/W2133638675","https://openalex.org/W2533248932","https://openalex.org/W2626757163","https://openalex.org/W4387321091","https://openalex.org/W4393159597","https://openalex.org/W4395112660","https://openalex.org/W4403883066","https://openalex.org/W4410636571","https://openalex.org/W4416037139"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,20,65,79,124,157,182,214],"rapid":[2],"advancement":[3],"of":[4,23,159,184],"large":[5],"language":[6],"models":[7],"(LLMs),":[8],"reinforcement":[9],"learning":[10],"(RL)":[11],"has":[12],"emerged":[13],"as":[14],"a":[15,109,150,176,202],"pivotal":[16],"methodology":[17],"for":[18,102],"enhancing":[19],"reasoning":[21],"capabilities":[22],"LLMs.":[24],"Unlike":[25],"traditional":[26],"pre-training":[27],"approaches,":[28],"RL":[29,46,67,131,136,215],"encompasses":[30],"multiple":[31],"stages:":[32],"rollout,":[33],"reward,":[34],"and":[35,92],"training,":[36],"which":[37],"necessitates":[38],"collaboration":[39],"among":[40,191],"various":[41],"worker":[42],"types.":[43],"However,":[44],"current":[45],"systems":[47],"continue":[48],"to":[49,56,70,134,143,164,169,188,201],"grapple":[50],"with":[51,138],"substantial":[52],"GPU":[53,84],"underutilization,":[54],"due":[55,69],"two":[57,139],"primary":[58],"factors:":[59],"(1)":[60],"The":[61],"rollout":[62,76,113,145,161,171,186,192],"stage":[63],"dominates":[64],"overall":[66],"process":[68],"test-time":[71],"scaling;":[72],"(2)":[73],"Imbalances":[74],"in":[75,83],"lengths":[77],"(within":[78],"same":[80],"batch)":[81],"result":[82],"bubbles.":[85],"While":[86],"prior":[87],"solutions":[88],"like":[89],"asynchronous":[90],"execution":[91],"truncation":[93],"offer":[94],"partial":[95],"relief,":[96],"they":[97],"may":[98],"compromise":[99],"training":[100,120,137],"accuracy":[101,211],"efficiency.":[103],"Our":[104],"key":[105,140],"insight":[106],"stems":[107],"from":[108],"previously":[110],"overlooked":[111],"observation:":[112],"responses":[114],"exhibit":[115],"remarkable":[116],"similarity":[117,158,183],"across":[118],"adjacent":[119],"epochs.":[121],"Based":[122],"on":[123],"insight,":[125],"we":[126,147,173],"introduce":[127,174],"RhymeRL,":[128],"an":[129],"LLM":[130],"system":[132],"designed":[133],"accelerate":[135],"innovations.":[141],"First,":[142],"enhance":[144],"generation,":[146],"present":[148],"HistoSpec,":[149],"speculative":[151],"decoding":[152],"inference":[153],"engine":[154],"that":[155,180,197],"utilizes":[156],"historical":[160,185],"token":[162],"sequences":[163],"obtain":[165],"accurate":[166],"drafts.":[167],"Second,":[168],"tackle":[170],"bubbles,":[172],"HistoPipe,":[175],"two-tier":[177],"scheduling":[178],"strategy":[179],"leverages":[181],"distributions":[187],"balance":[189],"workload":[190],"workers.":[193],"Experimental":[194],"results":[195],"demonstrate":[196],"RhymeRL":[198],"achieves":[199],"up":[200],"2.6x":[203],"performance":[204],"improvement":[205],"over":[206],"existing":[207],"methods,":[208],"without":[209],"compromising":[210],"or":[212],"modifying":[213],"paradigm.":[216]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-12T00:00:00"}
