{"id":"https://openalex.org/W2948198678","doi":"https://doi.org/10.1109/iros45743.2020.9340891","title":"Hypothesis-Driven Skill Discovery for Hierarchical Deep Reinforcement Learning","display_name":"Hypothesis-Driven Skill Discovery for Hierarchical Deep Reinforcement Learning","publication_year":2020,"publication_date":"2020-10-24","ids":{"openalex":"https://openalex.org/W2948198678","doi":"https://doi.org/10.1109/iros45743.2020.9340891","mag":"2948198678"},"language":"en","primary_location":{"id":"doi:10.1109/iros45743.2020.9340891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros45743.2020.9340891","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1906.01408","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003583202","display_name":"Caleb Chuck","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Caleb Chuck","raw_affiliation_strings":["The University of Texas at Austin Personal Robotics and Automation Lab","University of Texas at Austin"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin Personal Robotics and Automation Lab","institution_ids":[]},{"raw_affiliation_string":"University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039513625","display_name":"Supawit Chockchowwat","orcid":"https://orcid.org/0000-0003-2881-8501"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Supawit Chockchowwat","raw_affiliation_strings":["The University of Texas at Austin Personal Robotics and Automation Lab"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin Personal Robotics and Automation Lab","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043572737","display_name":"Scott Niekum","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Scott Niekum","raw_affiliation_strings":["The University of Texas at Austin Personal Robotics and Automation Lab"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin Personal Robotics and Automation Lab","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5003583202"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00578524,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"70","issue":null,"first_page":"5572","last_page":"5579"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8711562156677246},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7256989479064941},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6487206220626831},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6090494394302368},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.5545969605445862},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5491646528244019},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5266561508178711},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.49851202964782715},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4837091565132141},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4826655685901642},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4806765019893646},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10574594140052795}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8711562156677246},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7256989479064941},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6487206220626831},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6090494394302368},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.5545969605445862},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5491646528244019},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5266561508178711},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.49851202964782715},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4837091565132141},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4826655685901642},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4806765019893646},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10574594140052795},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C34447519","wikidata":"https://www.wikidata.org/wiki/Q179522","display_name":"Market economy","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/iros45743.2020.9340891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros45743.2020.9340891","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1906.01408","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1906.01408","pdf_url":"https://arxiv.org/pdf/1906.01408","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2948198678","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1906.01408","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1906.01408","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1906.01408","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1906.01408","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1906.01408","pdf_url":"https://arxiv.org/pdf/1906.01408","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2948198678.pdf","grobid_xml":"https://content.openalex.org/works/W2948198678.grobid-xml"},"referenced_works_count":86,"referenced_works":["https://openalex.org/W1512866498","https://openalex.org/W1529739011","https://openalex.org/W2079501320","https://openalex.org/W2101355568","https://openalex.org/W2101677491","https://openalex.org/W2108535023","https://openalex.org/W2109910161","https://openalex.org/W2111967991","https://openalex.org/W2132676037","https://openalex.org/W2143891888","https://openalex.org/W2168640731","https://openalex.org/W2344556769","https://openalex.org/W2521274174","https://openalex.org/W2561776174","https://openalex.org/W2594829461","https://openalex.org/W2596367596","https://openalex.org/W2736601468","https://openalex.org/W2737215407","https://openalex.org/W2738669288","https://openalex.org/W2754596546","https://openalex.org/W2761873684","https://openalex.org/W2770604561","https://openalex.org/W2805516822","https://openalex.org/W2807340089","https://openalex.org/W2885550588","https://openalex.org/W2886885155","https://openalex.org/W2895453875","https://openalex.org/W2899205164","https://openalex.org/W2900677074","https://openalex.org/W2914261249","https://openalex.org/W2920362155","https://openalex.org/W2955035422","https://openalex.org/W2962717849","https://openalex.org/W2962764591","https://openalex.org/W2962872206","https://openalex.org/W2963058385","https://openalex.org/W2963126744","https://openalex.org/W2963160877","https://openalex.org/W2963262099","https://openalex.org/W2963430173","https://openalex.org/W2963523627","https://openalex.org/W2963604043","https://openalex.org/W2964001908","https://openalex.org/W2964043796","https://openalex.org/W2964223703","https://openalex.org/W2964227312","https://openalex.org/W2964291307","https://openalex.org/W2964335642","https://openalex.org/W3003422588","https://openalex.org/W3150970803","https://openalex.org/W4248679889","https://openalex.org/W6623316541","https://openalex.org/W6630802251","https://openalex.org/W6640362335","https://openalex.org/W6676557315","https://openalex.org/W6676560026","https://openalex.org/W6686996450","https://openalex.org/W6692131809","https://openalex.org/W6692846177","https://openalex.org/W6703271639","https://openalex.org/W6704647235","https://openalex.org/W6718190810","https://openalex.org/W6730641667","https://openalex.org/W6731227521","https://openalex.org/W6734215269","https://openalex.org/W6735033012","https://openalex.org/W6735641298","https://openalex.org/W6739416393","https://openalex.org/W6740801417","https://openalex.org/W6741002519","https://openalex.org/W6741302124","https://openalex.org/W6746445604","https://openalex.org/W6751660332","https://openalex.org/W6751796012","https://openalex.org/W6752216966","https://openalex.org/W6753925943","https://openalex.org/W6754957883","https://openalex.org/W6755654293","https://openalex.org/W6755864697","https://openalex.org/W6756303580","https://openalex.org/W6756341295","https://openalex.org/W6758978475","https://openalex.org/W6760405395","https://openalex.org/W6764969207","https://openalex.org/W6771329031","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W2521274174","https://openalex.org/W3105335800","https://openalex.org/W2981344907","https://openalex.org/W3171623979","https://openalex.org/W2950197980","https://openalex.org/W3203056473","https://openalex.org/W2996006200","https://openalex.org/W2428397683","https://openalex.org/W2978069508","https://openalex.org/W3093201525","https://openalex.org/W3084024636","https://openalex.org/W3091395917","https://openalex.org/W3115080916","https://openalex.org/W2162227979","https://openalex.org/W3040427614","https://openalex.org/W3103763075","https://openalex.org/W3021105371","https://openalex.org/W3027456239","https://openalex.org/W2787329458","https://openalex.org/W3185189976"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,162],"learning":[2,7,55,131,163],"(DRL)":[3],"is":[4],"capable":[5],"of":[6,13,69,103,113,129,156],"high-performing":[8],"policies":[9,153],"on":[10],"a":[11,47,111,136,142],"variety":[12],"complex":[14],"high-dimensional":[15],"tasks,":[16],"ranging":[17],"from":[18,30,61,94],"video":[19],"games":[20],"to":[21,38,50,115],"robotic":[22,138],"manipulation.":[23],"However,":[24],"standard":[25],"DRL":[26],"methods":[27],"often":[28],"suffer":[29],"poor":[31],"sample":[32,59,127],"efficiency,":[33],"partially":[34],"because":[35],"they":[36],"aim":[37],"be":[39],"entirely":[40],"problem-agnostic.":[41],"In":[42,147],"this":[43],"work,":[44],"we":[45,82],"introduce":[46],"novel":[48],"approach":[49],"exploration":[51],"and":[52,76,87,109,141],"hierarchical":[53],"skill":[54],"that":[56,121],"derives":[57],"its":[58],"efficiency":[60,128],"intuitive":[62],"assumptions":[63],"it":[64],"makes":[65],"about":[66,100],"the":[67,73,84,101,126],"behavior":[68],"objects":[70,93],"both":[71],"in":[72,106,132],"physical":[74],"world":[75],"simulations":[77],"which":[78,91],"mimic":[79],"physics.":[80],"Specifically,":[81],"propose":[83],"Hypothesis":[85],"Proposal":[86],"Evaluation":[88],"(HyPE)":[89],"algorithm,":[90],"discovers":[92],"raw":[95],"pixel":[96],"data,":[97],"generates":[98],"hypotheses":[99],"controllability":[102],"observed":[104],"changes":[105],"object":[107],"state,":[108],"learns":[110,151],"hierarchy":[112],"skills":[114],"test":[116],"these":[117,148],"hypotheses.":[118],"We":[119],"demonstrate":[120],"HyPE":[122,150],"can":[123],"dramatically":[124],"improve":[125],"policy":[130],"two":[133],"different":[134],"domains:":[135],"simulated":[137],"blockpushing":[139],"domain,":[140],"popular":[143],"benchmark":[144],"task:":[145],"Breakout.":[146],"domains,":[149],"high-scoring":[152],"an":[154],"order":[155],"magnitude":[157],"faster":[158],"than":[159],"several":[160],"state-of-the-art":[161],"methods.":[164]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
