{"id":"https://openalex.org/W4416246795","doi":"https://doi.org/10.48550/arxiv.2510.19372","title":"On the Hardness of Reinforcement Learning with Transition Look-Ahead","display_name":"On the Hardness of Reinforcement Learning with Transition Look-Ahead","publication_year":2025,"publication_date":"2025-10-22","ids":{"openalex":"https://openalex.org/W4416246795","doi":"https://doi.org/10.48550/arxiv.2510.19372"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2510.19372","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.19372","pdf_url":"https://arxiv.org/pdf/2510.19372","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.19372","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114332069","display_name":"Corentin Pla","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Pla, Corentin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120650850","display_name":"Hugo Richard","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Richard, Hugo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027578003","display_name":"Marc Abeille","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abeille, Marc","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093793119","display_name":"Nadav Merlis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Merlis, Nadav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5067550667","display_name":"Vianney Perchet","orcid":"https://orcid.org/0000-0002-9333-264X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Perchet, Vianney","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5114332069"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.843999981880188,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.843999981880188,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.07500000298023224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.026399999856948853,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7224000096321106},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6653000116348267},{"id":"https://openalex.org/keywords/linear-programming","display_name":"Linear programming","score":0.42329999804496765},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.4122999906539917},{"id":"https://openalex.org/keywords/polynomial","display_name":"Polynomial","score":0.4092999994754791},{"id":"https://openalex.org/keywords/boundary","display_name":"Boundary (topology)","score":0.4027000069618225},{"id":"https://openalex.org/keywords/time-complexity","display_name":"Time complexity","score":0.3774000108242035}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7224000096321106},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6653000116348267},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.536899983882904},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5254999995231628},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.42329999804496765},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.4122999906539917},{"id":"https://openalex.org/C90119067","wikidata":"https://www.wikidata.org/wiki/Q43260","display_name":"Polynomial","level":2,"score":0.4092999994754791},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.40369999408721924},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.4027000069618225},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37790000438690186},{"id":"https://openalex.org/C311688","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Time complexity","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.3393999934196472},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3140999972820282},{"id":"https://openalex.org/C194232998","wikidata":"https://www.wikidata.org/wiki/Q1606712","display_name":"Transition (genetics)","level":3,"score":0.29600000381469727},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C113336015","wikidata":"https://www.wikidata.org/wiki/Q574010","display_name":"Complete information","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.2734000086784363},{"id":"https://openalex.org/C3020402766","wikidata":"https://www.wikidata.org/wiki/Q104376712","display_name":"Prior information","level":2,"score":0.27219998836517334},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.25440001487731934}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2510.19372","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.19372","pdf_url":"https://arxiv.org/pdf/2510.19372","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:HAL:hal-05476425v1","is_oa":false,"landing_page_url":"https://hal.science/hal-05476425","pdf_url":null,"source":{"id":"https://openalex.org/S4406922466","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://proceedings.mlr.press/v238/","raw_type":"Journal articles"},{"id":"doi:10.48550/arxiv.2510.19372","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.19372","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.19372","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.19372","pdf_url":"https://arxiv.org/pdf/2510.19372","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416246795.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"study":[1],"reinforcement":[2,107],"learning":[3],"(RL)":[4],"with":[5,61,103],"transition":[6,104],"look-ahead,":[7],"where":[8],"the":[9,38,83,99],"agent":[10],"may":[11],"observe":[12],"which":[13],"states":[14],"would":[15],"be":[16,66],"visited":[17],"upon":[18],"playing":[19],"any":[20],"sequence":[21],"of":[22,29,101],"$\\ell$":[23],"actions":[24],"before":[25],"deciding":[26],"its":[27],"course":[28],"action.":[30],"While":[31],"such":[32],"predictive":[33],"information":[34,46],"can":[35,65],"drastically":[36],"improve":[37],"achievable":[39],"performance,":[40],"we":[41,56],"show":[42],"that":[43,58],"using":[44],"this":[45],"optimally":[47],"comes":[48],"at":[49],"a":[50,72,90],"potentially":[51],"prohibitive":[52],"computational":[53],"cost.":[54],"Specifically,":[55],"prove":[57],"optimal":[59],"planning":[60,102],"one-step":[62],"look-ahead":[63,105],"($\\ell=1$)":[64],"solved":[67],"in":[68,106],"polynomial":[69],"time":[70],"through":[71],"novel":[73],"linear":[74],"programming":[75],"formulation.":[76],"In":[77],"contrast,":[78],"for":[79,98],"$\\ell":[80],"\\geq":[81],"2$,":[82],"problem":[84,100],"becomes":[85],"NP-hard.":[86],"Our":[87],"results":[88],"delineate":[89],"precise":[91],"boundary":[92],"between":[93],"tractable":[94],"and":[95],"intractable":[96],"cases":[97],"learning.":[108]},"counts_by_year":[],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-24T00:00:00"}
