{"id":"https://openalex.org/W7157707210","doi":"https://doi.org/10.1145/3805621.3807632","title":"<i> E <scp>arl</scp> : </i> Efficient Agentic RL Post-Training for LLMs under Dynamic Context Lengths","display_name":"<i> E <scp>arl</scp> : </i> Efficient Agentic RL Post-Training for LLMs under Dynamic Context Lengths","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7157707210","doi":"https://doi.org/10.1145/3805621.3807632"},"language":null,"primary_location":{"id":"doi:10.1145/3805621.3807632","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805621.3807632","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth European Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805621.3807632","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134874074","display_name":"Zheyue Tan","orcid":"https://orcid.org/0009-0002-1700-1053"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Zheyue Tan","raw_affiliation_strings":["Aalto University, Espoo, Finland"],"raw_orcid":"https://orcid.org/0009-0002-1700-1053","affiliations":[{"raw_affiliation_string":"Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042475680","display_name":"Tuo Shi","orcid":"https://orcid.org/0000-0003-3685-2099"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Tuo Shi","raw_affiliation_strings":["Aalto University, Espoo, Finland"],"raw_orcid":"https://orcid.org/0000-0003-3685-2099","affiliations":[{"raw_affiliation_string":"Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134863645","display_name":"Huining Yuan","orcid":"https://orcid.org/0000-0002-3438-3535"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huining Yuan","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3438-3535","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062497242","display_name":"Zelai Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zelai Xu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5578-199X","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134849474","display_name":"Chao Yu","orcid":"https://orcid.org/0000-0001-6975-0158"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Yu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-6975-0158","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134872720","display_name":"Boxun Li","orcid":"https://orcid.org/0000-0002-6370-1723"},"institutions":[{"id":"https://openalex.org/I4210145118","display_name":"Infinitus (China)","ror":"https://ror.org/03pchte23","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210145118"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Boxun Li","raw_affiliation_strings":["Infinigence AI, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-6370-1723","affiliations":[{"raw_affiliation_string":"Infinigence AI, Beijing, China","institution_ids":["https://openalex.org/I4210145118"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134861232","display_name":"Yu Wang","orcid":"https://orcid.org/0000-0001-6108-5157"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Wang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-6108-5157","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025795919","display_name":"Bo Zhao","orcid":"https://orcid.org/0000-0002-0768-3444"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Bo Zhao","raw_affiliation_strings":["Aalto University, Espoo, Finland"],"raw_orcid":"https://orcid.org/0000-0002-0768-3444","affiliations":[{"raw_affiliation_string":"Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5134874074"],"corresponding_institution_ids":["https://openalex.org/I9927081"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.96030814,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"41","last_page":"48"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1979999989271164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1979999989271164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.11559999734163284,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.06120000034570694,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6930000185966492},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6129000186920166},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.527999997138977},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.4447000026702881},{"id":"https://openalex.org/keywords/truncation","display_name":"Truncation (statistics)","score":0.4196999967098236},{"id":"https://openalex.org/keywords/context-effect","display_name":"Context effect","score":0.2856000065803528}],"concepts":[{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6930000185966492},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6347000002861023},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6129000186920166},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.527999997138977},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.4447000026702881},{"id":"https://openalex.org/C106195933","wikidata":"https://www.wikidata.org/wiki/Q7847935","display_name":"Truncation (statistics)","level":2,"score":0.4196999967098236},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32280001044273376},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.30169999599456787},{"id":"https://openalex.org/C76188268","wikidata":"https://www.wikidata.org/wiki/Q1783165","display_name":"Context effect","level":3,"score":0.2856000065803528},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.2822999954223633},{"id":"https://openalex.org/C3017912452","wikidata":"https://www.wikidata.org/wiki/Q5970087","display_name":"Scale effects","level":3,"score":0.28060001134872437},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C108170787","wikidata":"https://www.wikidata.org/wiki/Q3951828","display_name":"Agency (philosophy)","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.26010000705718994},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805621.3807632","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805621.3807632","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth European Workshop on Machine Learning and Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805621.3807632","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805621.3807632","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth European Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W4226278401","https://openalex.org/W4280611847","https://openalex.org/W4387113741","https://openalex.org/W4389755588","https://openalex.org/W4403883066","https://openalex.org/W4406297511","https://openalex.org/W4414809837","https://openalex.org/W4416365805","https://openalex.org/W4416679313","https://openalex.org/W4417078041","https://openalex.org/W4417097658","https://openalex.org/W7077064451"],"related_works":[],"abstract_inverted_index":{"Agentic":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"is":[4,20],"increasingly":[5],"used":[6],"to":[7],"post-train":[8],"large":[9],"language":[10],"models":[11],"(LLMs)":[12],"into":[13],"multi-turn":[14],"agents.":[15],"However,":[16],"scaling":[17],"agentic":[18],"RL":[19],"challenging":[21],"because":[22],"the":[23],"effective":[24],"context":[25],"length":[26,75],"grows":[27],"during":[28],"training:":[29],"agents":[30],"become":[31],"more":[32],"verbose":[33],"and":[34,50,65,71,76],"rollouts":[35],"accumulate":[36],"long":[37],"histories.":[38],"This":[39],"context-length":[40],"explosion":[41],"creates":[42],"two":[43],"coupled":[44],"systems":[45],"bottlenecks:":[46],"(i)":[47],"attention":[48],"compute":[49],"memory":[51],"scale":[52,73],"with":[53,74],"length,":[54],"causing":[55],"truncation":[56],"or":[57],"out-of-memory":[58],"(OOM)":[59],"events":[60],"that":[61],"can":[62],"destabilize":[63],"learning;":[64],"(ii)":[66],"experience":[67],"tensors":[68],"(e.g.,":[69],"tokens":[70],"log-probabilities)":[72],"make":[77],"cross-device":[78],"exchange":[79],"a":[80],"dominant":[81],"cost.":[82]},"counts_by_year":[],"updated_date":"2026-04-30T06:11:10.768123","created_date":"2026-04-30T00:00:00"}
