{"id":"https://openalex.org/W7154251313","doi":"https://doi.org/10.48550/arxiv.2604.10812","title":"PokeRL: Reinforcement Learning for Pokemon Red","display_name":"PokeRL: Reinforcement Learning for Pokemon Red","publication_year":2026,"publication_date":"2026-04-12","ids":{"openalex":"https://openalex.org/W7154251313","doi":"https://doi.org/10.48550/arxiv.2604.10812"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10812","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10812","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10812","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133607116","display_name":"Dheeraj Mudireddy","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mudireddy, Dheeraj","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133618314","display_name":"Sai Patibandla","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patibandla, Sai","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5133607116"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.5634999871253967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.5634999871253967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.35850000381469727,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.00930000003427267,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8159999847412109},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5703999996185303},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5440000295639038},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.49720001220703125},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4016999900341034},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3862999975681305},{"id":"https://openalex.org/keywords/champion","display_name":"Champion","score":0.36399999260902405}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8159999847412109},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6565999984741211},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5703999996185303},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5440000295639038},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.49720001220703125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44530001282691956},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4016999900341034},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3862999975681305},{"id":"https://openalex.org/C2780465443","wikidata":"https://www.wikidata.org/wiki/Q852061","display_name":"Champion","level":2,"score":0.36399999260902405},{"id":"https://openalex.org/C207456731","wikidata":"https://www.wikidata.org/wiki/Q660818","display_name":"League","level":2,"score":0.336899995803833},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.30480000376701355},{"id":"https://openalex.org/C147494362","wikidata":"https://www.wikidata.org/wiki/Q2078905","display_name":"Troubleshooting","level":2,"score":0.301800012588501},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.27230000495910645},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.2549000084400177},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.2547000050544739}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10812","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10812","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10812","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10812","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Pokemon":[0,83,157],"Red":[1],"is":[2,162],"a":[3,18,68,107,118,125,148],"long-horizon":[4],"JRPG":[5],"with":[6,50,115],"sparse":[7],"rewards,":[8],"partial":[9],"observability,":[10],"and":[11,42,97,121,124,145,155],"quirky":[12],"control":[13],"mechanics":[14],"that":[15,29,71,132],"make":[16],"it":[17],"challenging":[19],"benchmark":[20],"for":[21],"reinforcement":[22,74],"learning.":[23],"While":[24],"recent":[25],"work":[26],"has":[27],"shown":[28],"PPO":[30],"agents":[31,51,76],"can":[32],"clear":[33],"the":[34,87,99,112],"first":[35,100],"two":[36],"gyms":[37],"using":[38],"heavy":[39],"reward":[40,128],"shaping":[41],"engineered":[43],"observations,":[44],"training":[45],"remains":[46],"brittle":[47],"in":[48,82],"practice,":[49],"often":[52],"degenerating":[53],"into":[54],"action":[55],"loops,":[56],"menu":[57],"spam,":[58,146],"or":[59],"unproductive":[60],"wandering.":[61],"In":[62],"this":[63],"paper,":[64],"we":[65],"present":[66],"PokeRL,":[67,136],"modular":[69],"system":[70],"trains":[72],"deep":[73],"learning":[75],"to":[77,93],"complete":[78],"early":[79],"game":[80],"tasks":[81],"Red,":[84],"including":[85],"exiting":[86],"player's":[88],"house,":[89],"exploring":[90],"Pallet":[91],"Town":[92],"reach":[94],"tall":[95],"grass,":[96],"winning":[98],"rival":[101],"battle.":[102],"Our":[103],"main":[104],"contributions":[105],"are":[106,147],"loop-aware":[108],"environment":[109],"wrapper":[110],"around":[111],"PyBoy":[113],"emulator":[114],"map":[116],"masking,":[117],"multi-layer":[119],"anti-loop":[120],"anti-spam":[122],"mechanism,":[123],"dense":[126],"hierarchical":[127],"design.":[129],"We":[130],"argue":[131],"practical":[133],"systems":[134],"like":[135],"which":[137],"explicitly":[138],"model":[139],"failure":[140],"modes":[141],"such":[142],"as":[143],"loops":[144],"necessary":[149],"intermediate":[150],"step":[151],"between":[152],"toy":[153],"benchmarks":[154],"full":[156],"League":[158],"champion":[159],"agents.":[160],"Code":[161],"available":[163],"at":[164],"https://github.com/reddheeraj/PokemonRL":[165]},"counts_by_year":[],"updated_date":"2026-04-15T06:04:33.058270","created_date":"2026-04-15T00:00:00"}
