{"id":"https://openalex.org/W7141525272","doi":"https://doi.org/10.48550/arxiv.2603.24840","title":"Prune as You Generate: Online Rollout Pruning for Faster and Better RLVR","display_name":"Prune as You Generate: Online Rollout Pruning for Faster and Better RLVR","publication_year":2026,"publication_date":"2026-03-25","ids":{"openalex":"https://openalex.org/W7141525272","doi":"https://doi.org/10.48550/arxiv.2603.24840"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.24840","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.24840","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130806488","display_name":"Haobo Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Haobo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130810110","display_name":"Sirui Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Sirui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130790364","display_name":"Ruizhong Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Ruizhong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130766439","display_name":"Yuchen Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Yuchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130724506","display_name":"Chen Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130723744","display_name":"Monica Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Monica","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130750106","display_name":"Jingrui He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Jingrui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130741153","display_name":"Hanghang Tong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tong, Hanghang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5130806488"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3476000130176544,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3476000130176544,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.07890000194311142,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.060100000351667404,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.8108000159263611},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5860000252723694},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5353999733924866},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5077000260353088},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4668000042438507},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.4625000059604645},{"id":"https://openalex.org/keywords/rare-events","display_name":"Rare events","score":0.4562999904155731},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4537999927997589},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.40139999985694885}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8266000151634216},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.8108000159263611},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5860000252723694},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5353999733924866},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.522599995136261},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5077000260353088},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4668000042438507},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.4625000059604645},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4603999853134155},{"id":"https://openalex.org/C2777317252","wikidata":"https://www.wikidata.org/wiki/Q18393516","display_name":"Rare events","level":2,"score":0.4562999904155731},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4537999927997589},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.40139999985694885},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.37470000982284546},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.3619999885559082},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.34450000524520874},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.3206999897956848},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.28189998865127563},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.26269999146461487},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.26089999079704285},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2540000081062317},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.24840","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.24840","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"with":[2],"Verifiable":[3],"Rewards":[4],"(RLVR)":[5],"has":[6],"significantly":[7],"advanced":[8],"the":[9,42,90,109,150,155],"reasoning":[10],"capabilities":[11],"of":[12,112],"Large":[13],"Language":[14],"Models":[15],"(LLMs).":[16],"However,":[17],"methods":[18],"such":[19],"as":[20],"GRPO":[21,165],"and":[22,60,115,153,161,166,170,189],"DAPO":[23,167],"suffer":[24],"from":[25],"substantial":[26],"computational":[27],"cost,":[28],"since":[29],"they":[30],"rely":[31],"on":[32,168],"sampling":[33],"many":[34,48],"rollouts":[35,84,114,148],"for":[36,158],"each":[37],"prompt.":[38],"Moreover,":[39],"in":[40,196,199],"RLVR":[41,72],"relative":[43],"advantage":[44],"is":[45,204],"often":[46],"sparse:":[47],"samples":[49],"become":[50],"nearly":[51],"all-correct":[52],"or":[53],"all-incorrect,":[54],"yielding":[55,190],"low":[56],"within-group":[57],"reward":[58],"variance":[59],"thus":[61],"weak":[62],"learning":[63,97],"signals.":[64,98],"In":[65],"this":[66],"paper,":[67],"we":[68,141],"introduce":[69],"arrol":[70,100,174],"(Accelerating":[71],"via":[73],"online":[74,78],"Rollout":[75],"Pruning),":[76],"an":[77],"rollout":[79],"pruning":[80,121],"method":[81],"that":[82,146],"prunes":[83,147],"during":[85,135],"generation":[86],"while":[87,182],"explicitly":[88],"steering":[89],"surviving":[91],"ones":[92,157],"more":[93],"correctness-balanced":[94],"to":[95,107,118,131,180,185,192],"enhance":[96],"Specifically,":[99],"trains":[101],"a":[102,143],"lightweight":[103],"quality":[104,125],"head":[105,126],"on-the-fly":[106],"predict":[108],"success":[110],"probability":[111],"partial":[113],"uses":[116],"it":[117],"make":[119],"early":[120],"decisions.":[122],"The":[123,202],"learned":[124],"can":[127],"further":[128],"weigh":[129],"candidates":[130],"improve":[132,139],"inference":[133,151],"accuracy":[134,177,198],"test-time":[136,200],"scaling.":[137,201],"To":[138],"efficiency,":[140],"present":[142],"system":[144],"design":[145],"inside":[149],"engine":[152],"re-batches":[154],"remaining":[156],"log-probability":[159],"computation":[160],"policy":[162],"updates.":[163],"Across":[164],"Qwen-3":[169],"LLaMA-3.2":[171],"models":[172],"(1B-8B),":[173],"improves":[175],"average":[176,197],"by":[178],"+2.30":[179],"+2.99":[181],"achieving":[183],"up":[184,191],"1.7x":[186],"training":[187],"speedup,":[188],"+8.33":[193],"additional":[194],"gains":[195],"code":[203],"available":[205],"at":[206],"https://github.com/Hsu1023/ARRoL.":[207]},"counts_by_year":[],"updated_date":"2026-03-28T06:16:51.555046","created_date":"2026-03-28T00:00:00"}
