{"id":"https://openalex.org/W4417098782","doi":"https://doi.org/10.48550/arxiv.2508.01287","title":"Exploitation Is All You Need... for Exploration","display_name":"Exploitation Is All You Need... for Exploration","publication_year":2025,"publication_date":"2025-08-02","ids":{"openalex":"https://openalex.org/W4417098782","doi":"https://doi.org/10.48550/arxiv.2508.01287"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2508.01287","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.01287","pdf_url":"https://arxiv.org/pdf/2508.01287","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2508.01287","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116038848","display_name":"Micah Rentschler","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rentschler, Micah","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020253845","display_name":"Jesse Roberts","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roberts, Jesse","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.4081000089645386,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.4081000089645386,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.2806999981403351,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10646","display_name":"Experimental Behavioral Economics Studies","score":0.04580000042915344,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/incentive","display_name":"Incentive","score":0.6503000259399414},{"id":"https://openalex.org/keywords/dilemma","display_name":"Dilemma","score":0.5752999782562256},{"id":"https://openalex.org/keywords/thompson-sampling","display_name":"Thompson sampling","score":0.45910000801086426},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.444599986076355},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.33959999680519104},{"id":"https://openalex.org/keywords/greedy-algorithm","display_name":"Greedy algorithm","score":0.32850000262260437}],"concepts":[{"id":"https://openalex.org/C29122968","wikidata":"https://www.wikidata.org/wiki/Q1414816","display_name":"Incentive","level":2,"score":0.6503000259399414},{"id":"https://openalex.org/C2778496695","wikidata":"https://www.wikidata.org/wiki/Q254128","display_name":"Dilemma","level":2,"score":0.5752999782562256},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5221999883651733},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.45910000801086426},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.444599986076355},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.3612000048160553},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34470000863075256},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.33959999680519104},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.32089999318122864},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.2824000120162964},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27410000562667847},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.26089999079704285},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.2556999921798706},{"id":"https://openalex.org/C2781415417","wikidata":"https://www.wikidata.org/wiki/Q25304581","display_name":"Incomplete contracts","level":3,"score":0.2551000118255615}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2508.01287","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.01287","pdf_url":"https://arxiv.org/pdf/2508.01287","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2508.01287","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.01287","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2508.01287","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.01287","pdf_url":"https://arxiv.org/pdf/2508.01287","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Ensuring":[0],"sufficient":[1,108],"exploration":[2,114,159,203],"is":[3,168],"a":[4,49,105,139,143,217],"central":[5],"challenge":[6],"when":[7,132],"training":[8],"meta-reinforcement":[9],"learning":[10,101],"(meta-RL)":[11],"agents":[12],"to":[13,19,34,47,78,88,115,190],"solve":[14],"novel":[15],"environments.":[16],"Conventional":[17],"solutions":[18],"the":[20,69,86,110,191,200],"exploration-exploitation":[21],"dilemma":[22],"inject":[23],"explicit":[24],"incentives":[25],"such":[26],"as":[27,210],"randomization,":[28],"uncertainty":[29],"bonuses,":[30],"or":[31,165],"intrinsic":[32],"rewards":[33],"encourage":[35],"exploration.":[36],"In":[37],"this":[38],"work,":[39],"we":[40,129,188],"hypothesize":[41],"that":[42,74,157],"an":[43],"agent":[44,87,166],"trained":[45,141],"solely":[46],"maximize":[48],"greedy":[50,145],"(exploitation-only)":[51],"objective":[52,146],"can":[53,214],"nonetheless":[54],"exhibit":[55],"emergent":[56,158,185],"exploratory":[57,149],"behavior,":[58],"provided":[59],"three":[60],"conditions":[61],"are":[62,137],"met:":[63],"(1)":[64],"Recurring":[65],"Environmental":[66],"Structure,":[67],"where":[68,100],"environment":[70],"features":[71],"repeatable":[72],"regularities":[73],"allow":[75],"past":[76],"experience":[77],"inform":[79,116],"future":[80],"choices;":[81],"(2)":[82],"Agent":[83],"Memory,":[84],"enabling":[85],"retain":[89],"and":[90,95,125,135,204],"utilize":[91],"historical":[92],"interaction":[93],"data;":[94],"(3)":[96],"Long-Horizon":[97],"Credit":[98],"Assignment,":[99],"propagates":[102],"returns":[103],"over":[104],"time":[106],"frame":[107],"for":[109],"delayed":[111],"benefits":[112],"of":[113],"current":[117],"decisions.":[118],"Through":[119],"experiments":[120],"in":[121],"stochastic":[122],"multi-armed":[123],"bandits":[124],"temporally":[126],"extended":[127],"gridworlds,":[128],"observe":[130],"that,":[131,198],"both":[133],"structure":[134,164],"memory":[136,167],"present,":[138],"policy":[140],"on":[142],"strictly":[144],"exhibits":[147],"information-seeking":[148],"behavior.":[150],"We":[151],"further":[152],"demonstrate,":[153],"through":[154],"controlled":[155],"ablations,":[156],"vanishes":[160],"if":[161],"either":[162],"environmental":[163],"absent":[169],"(Conditions":[170],"1":[171],"&amp;":[172],"2).":[173],"Surprisingly,":[174],"removing":[175],"long-horizon":[176],"credit":[177],"assignment":[178],"(Condition":[179],"3)":[180],"does":[181],"not":[182,207],"always":[183],"prevent":[184],"exploration-a":[186],"result":[187],"attribute":[189],"pseudo-Thompson":[192],"Sampling":[193],"effect.":[194],"These":[195],"findings":[196],"suggest":[197],"under":[199],"right":[201],"prerequisites,":[202],"exploitation":[205],"need":[206],"be":[208],"treated":[209],"orthogonal":[211],"objectives":[212],"but":[213],"emerge":[215],"from":[216],"unified":[218],"reward-maximization":[219],"process.":[220]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
