{"id":"https://openalex.org/W7157312094","doi":"https://doi.org/10.48550/arxiv.2604.24558","title":"Hierarchical Behaviour Spaces","display_name":"Hierarchical Behaviour Spaces","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7157312094","doi":"https://doi.org/10.48550/arxiv.2604.24558"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.24558","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24558","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.24558","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113174782","display_name":"Michael Matthews","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matthews, Michael Tryfan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048689678","display_name":"Anssi Kanervisto","orcid":"https://orcid.org/0000-0002-7479-4574"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kanervisto, Anssi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134802972","display_name":"Jakob Foerster","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Foerster, Jakob","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068006776","display_name":"Pierluca D\u2019Oro","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"D'Oro, Pierluca","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134758779","display_name":"Scott Fujimoto","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fujimoto, Scott","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5045766292","display_name":"Mikael Henaff","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Henaff, Mikael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8571000099182129,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8571000099182129,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.04320000112056732,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.010099999606609344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.754800021648407},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7123000025749207},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6399999856948853},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.5561000108718872},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5236999988555908},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.501800000667572},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.3474999964237213}],"concepts":[{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.754800021648407},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7123000025749207},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6399999856948853},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.5561000108718872},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5530999898910522},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5236999988555908},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.501800000667572},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4722999930381775},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3474999964237213},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.3458000123500824},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31940001249313354},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3188000023365021},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.30090001225471497},{"id":"https://openalex.org/C142730499","wikidata":"https://www.wikidata.org/wiki/Q934367","display_name":"Function space","level":2,"score":0.2818000018596649},{"id":"https://openalex.org/C143724316","wikidata":"https://www.wikidata.org/wiki/Q312468","display_name":"Series (stratigraphy)","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2628999948501587},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2583000063896179},{"id":"https://openalex.org/C124527596","wikidata":"https://www.wikidata.org/wiki/Q17029359","display_name":"Hierarchical control system","level":3,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.24558","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24558","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.24558","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24558","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"work":[1],"in":[2,9,107],"hierarchical":[3],"reinforcement":[4],"learning":[5,16],"has":[6],"shown":[7],"success":[8],"scaling":[10],"to":[11,44,67],"billions":[12],"of":[13,20,29,48,65,93,105],"timesteps":[14],"when":[15],"over":[17,57],"a":[18,31,46,61,91],"set":[19,64],"predefined":[21],"option":[22],"reward":[23,33,38,58],"functions.":[24],"We":[25,70,78,89],"show":[26],"that,":[27,97],"instead":[28],"using":[30],"single":[32],"function":[34],"per":[35],"option,":[36],"the":[37,52,82,103],"functions":[39],"can":[40],"be":[41,68],"effectively":[42],"used":[43],"induce":[45],"space":[47],"behaviours,":[49],"by":[50],"letting":[51],"controller":[53],"specify":[54],"linear":[55],"combinations":[56],"functions,":[59],"allowing":[60],"more":[62],"expressive":[63],"policies":[66],"represented.":[69],"call":[71],"this":[72],"method":[73,109],"Hierarchical":[74],"Behaviour":[75],"Spaces":[76],"(HBS).":[77],"evaluate":[79],"HBS":[80],"on":[81],"NetHack":[83],"Learning":[84],"Environment,":[85],"demonstrating":[86],"strong":[87],"performance.":[88],"conduct":[90],"series":[92],"experiments":[94],"and":[95],"determine":[96],"perhaps":[98],"going":[99],"against":[100],"conventional":[101],"wisdom,":[102],"benefits":[104],"hierarchy":[106],"our":[108],"come":[110],"from":[111],"increased":[112],"exploration":[113],"rather":[114],"than":[115],"long":[116],"term":[117],"reasoning.":[118]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-29T00:00:00"}
