{"id":"https://openalex.org/W7160063060","doi":"https://doi.org/10.48550/arxiv.2605.00298","title":"Data Deletion Can Help in Adaptive RL","display_name":"Data Deletion Can Help in Adaptive RL","publication_year":2026,"publication_date":"2026-04-30","ids":{"openalex":"https://openalex.org/W7160063060","doi":"https://doi.org/10.48550/arxiv.2605.00298"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.00298","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.00298","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.00298","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051971877","display_name":"Param Budhraja","orcid":"https://orcid.org/0009-0007-4311-391X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Budhraja, Param","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030038191","display_name":"Aditya Gangrade","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gangrade, Aditya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005863931","display_name":"Alex Olshevsky","orcid":"https://orcid.org/0000-0002-5852-9789"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olshevsky, Alex","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5048704387","display_name":"Venkatesh Saligrama","orcid":"https://orcid.org/0000-0002-0675-2268"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saligrama, Venkatesh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6830000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6830000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.11999999731779099,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.017100000753998756,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.6274999976158142},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5239999890327454},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.4927999973297119},{"id":"https://openalex.org/keywords/empirical-distribution-function","display_name":"Empirical distribution function","score":0.39959999918937683},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.3986000120639801},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.3917999863624573},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.3643999993801117},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.35749998688697815},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.35260000824928284}],"concepts":[{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.6274999976158142},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5440000295639038},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5239999890327454},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.4927999973297119},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41370001435279846},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40880000591278076},{"id":"https://openalex.org/C98385598","wikidata":"https://www.wikidata.org/wiki/Q1339385","display_name":"Empirical distribution function","level":2,"score":0.39959999918937683},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.3986000120639801},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.3917999863624573},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3864000141620636},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3831000030040741},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3643999993801117},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.35749998688697815},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.35260000824928284},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.349700003862381},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.3422999978065491},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.33410000801086426},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.3264000117778778},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.32260000705718994},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.3107999861240387},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C86426650","wikidata":"https://www.wikidata.org/wiki/Q7452504","display_name":"Sequential estimation","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C107321475","wikidata":"https://www.wikidata.org/wiki/Q5374254","display_name":"Empirical risk minimization","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C55350006","wikidata":"https://www.wikidata.org/wiki/Q237193","display_name":"Exponential distribution","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C87007009","wikidata":"https://www.wikidata.org/wiki/Q210832","display_name":"Statistical hypothesis testing","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C101097943","wikidata":"https://www.wikidata.org/wiki/Q5176983","display_name":"Counterintuitive","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.25130000710487366},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.25119999051094055},{"id":"https://openalex.org/C5465570","wikidata":"https://www.wikidata.org/wiki/Q5326898","display_name":"Early stopping","level":3,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.00298","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.00298","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.00298","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.00298","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.748874843120575}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deploying":[0],"reinforcement":[1],"learning":[2],"policies":[3],"in":[4,17,206,224],"the":[5,18,44,55,69,81,88,117,198,202,239,245,264],"real":[6],"world":[7],"requires":[8],"adapting":[9],"to":[10,173,275],"time-varying":[11],"environments.":[12],"We":[13,72],"study":[14],"this":[15,207,234,254],"problem":[16],"contextual":[19],"Markov":[20],"Decision":[21],"Process":[22],"(cMDP)":[23],"framework,":[24],"where":[25],"a":[26,33,47,62,74,85,112,166,175,195,214,258],"family":[27],"of":[28,54,87,142,261],"environments":[29],"is":[30,98,242,249],"indexed":[31],"by":[32,151,156],"low-dimensional":[34],"context":[35,63,67],"unknown":[36],"at":[37,121,204],"test":[38,222],"time.":[39],"The":[40],"standard":[41],"approach":[42],"decomposes":[43],"problem:":[45],"train":[46,199],"so-called":[48],"\"universal":[49],"policy\"":[50],"which":[51,65,143],"assumes":[52],"knowledge":[53],"true":[56],"context,":[57],"then":[58],"pair":[59],"it":[60,164],"with":[61,169,194],"estimator":[64,118],"approximates":[66],"using":[68,103],"observed":[70],"trajectory.":[71],"identify":[73],"simple,":[75],"counterintuitive":[76],"trick":[77],"that":[78,212],"substantially":[79],"improves":[80],"estimator:":[82],"randomly":[83],"delete":[84],"fraction":[86],"training":[89,218,268],"buffer":[90],"after":[91],"each":[92],"round.":[93],"This":[94,147],"works":[95],"because":[96],"data":[97,133],"collected":[99],"across":[100],"multiple":[101],"rounds":[102],"progressively":[104],"better":[105],"policies,":[106],"and":[107,155,184,201,244,269],"older":[108,132],"trajectories":[109],"come":[110],"from":[111],"different":[113],"distribution":[114,200,203,265],"than":[115],"what":[116],"will":[119],"face":[120],"deployment":[122,270],"time;":[123],"random":[124,217],"deletion":[125,186,236,274],"creates":[126],"an":[127],"implicit":[128],"exponential":[129],"decay":[130],"on":[131,158],"while":[134],"preserving":[135],"diversity":[136],"without":[137,179],"requiring":[138],"any":[139],"explicit":[140],"identification":[141],"samples":[144],"are":[145],"stale.":[146],"reduces":[148],"robustness":[149],"gap":[150],"30%":[152],"for":[153,160,273],"MLPs":[154],"6%":[157],"average":[159],"recurrent":[161],"networks.":[162],"Strikingly,":[163],"allows":[165],"narrow":[167],"MLP":[168,177],"5x":[170],"fewer":[171],"parameters":[172],"outperform":[174],"wide":[176],"trained":[178],"deletion.":[180],"To":[181],"understand":[182],"when":[183,238],"why":[185],"helps,":[187],"we":[188,210,232],"analyze":[189],"regularized":[190],"empirical":[191],"risk":[192],"minimization":[193],"mismatch":[196,266],"between":[197,267],"deployment;":[205],"idealized":[208],"setting,":[209],"prove":[211],"removing":[213],"single":[215],"uniformly":[216],"point":[219],"decreases":[220],"expected":[221],"loss":[223],"expectation":[225],"under":[226],"mild":[227],"conditions.":[228],"For":[229],"ridge":[230],"regression":[231],"make":[233],"quantitative:":[235],"helps":[237],"regularization":[240],"coefficient":[241],"moderate":[243],"signal-to-noise":[246],"ratio":[247],"(SNR)":[248],"sufficiently":[250],"low,":[251],"and,":[252],"crucially,":[253],"SNR":[255],"threshold":[256],"gives":[257],"direct":[259],"measure":[260],"how":[262],"large":[263],"must":[271],"be":[272,276],"beneficial.":[277]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-05T00:00:00"}
