{"id":"https://openalex.org/W4385488604","doi":"https://doi.org/10.1109/ijcnn54540.2023.10190993","title":"Balancing Exploration and Exploitation in Hierarchical Reinforcement Learning via Latent Landmark Graphs","display_name":"Balancing Exploration and Exploitation in Hierarchical Reinforcement Learning via Latent Landmark Graphs","publication_year":2023,"publication_date":"2023-06-18","ids":{"openalex":"https://openalex.org/W4385488604","doi":"https://doi.org/10.1109/ijcnn54540.2023.10190993"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn54540.2023.10190993","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10190993","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102868080","display_name":"Qingyang Zhang","orcid":"https://orcid.org/0000-0003-3362-6416"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingyang Zhang","raw_affiliation_strings":["Institute of Automation,Chinese Academy of Sciences,Beijing,China","School of Future Technology, University of Chinese Academy of Sciences, Beijing, China","Chinese Academy of Sciences, Institute of Automation, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Automation,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Future Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Automation, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041434096","display_name":"Yiming Yang","orcid":"https://orcid.org/0000-0003-1359-0364"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiming Yang","raw_affiliation_strings":["Institute of Automation,Chinese Academy of Sciences,Beijing,China","Chinese Academy of Sciences, Institute of Automation, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Automation,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Automation, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076546255","display_name":"Jingqing Ruan","orcid":"https://orcid.org/0000-0002-4857-9053"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingqing Ruan","raw_affiliation_strings":["Institute of Automation,Chinese Academy of Sciences,Beijing,China","Chinese Academy of Sciences, Institute of Automation, Beijing, China","School of Future Technology, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Automation,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Automation, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Future Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012772278","display_name":"Xuantang Xiong","orcid":"https://orcid.org/0000-0001-8824-1324"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuantang Xiong","raw_affiliation_strings":["Institute of Automation,Chinese Academy of Sciences,Beijing,China","Chinese Academy of Sciences, Institute of Automation, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Automation,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Automation, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101684094","display_name":"Dengpeng Xing","orcid":"https://orcid.org/0000-0002-8251-9118"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dengpeng Xing","raw_affiliation_strings":["Institute of Automation,Chinese Academy of Sciences,Beijing,China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","Chinese Academy of Sciences, Institute of Automation, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Automation,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Automation, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108642431","display_name":"Bo Xu","orcid":"https://orcid.org/0000-0002-1111-1529"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Xu","raw_affiliation_strings":["Institute of Automation,Chinese Academy of Sciences,Beijing,China","School of Future Technology, University of Chinese Academy of Sciences, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","Chinese Academy of Sciences, Institute of Automation, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Automation,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Future Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Automation, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.979,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.80397486,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9663000106811523,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.929099977016449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8410944938659668},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7435612082481384},{"id":"https://openalex.org/keywords/landmark","display_name":"Landmark","score":0.733089029788971},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6170536875724792},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6004530787467957},{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.5261744260787964},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.5241121053695679},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4865044951438904},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.42241552472114563},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4105859398841858},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10032191872596741}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8410944938659668},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7435612082481384},{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.733089029788971},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6170536875724792},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6004530787467957},{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.5261744260787964},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.5241121053695679},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4865044951438904},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.42241552472114563},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4105859398841858},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10032191872596741},{"id":"https://openalex.org/C27206212","wikidata":"https://www.wikidata.org/wiki/Q34178","display_name":"Theology","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn54540.2023.10190993","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10190993","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W567721252","https://openalex.org/W1592847719","https://openalex.org/W1970139083","https://openalex.org/W2012833704","https://openalex.org/W2073459066","https://openalex.org/W2109910161","https://openalex.org/W2146444479","https://openalex.org/W2158782408","https://openalex.org/W2160371091","https://openalex.org/W2781726626","https://openalex.org/W2811111819","https://openalex.org/W2920215304","https://openalex.org/W2922388521","https://openalex.org/W2949267040","https://openalex.org/W2949475445","https://openalex.org/W2950614095","https://openalex.org/W2963262099","https://openalex.org/W2963321092","https://openalex.org/W2963761387","https://openalex.org/W2976657239","https://openalex.org/W2997289589","https://openalex.org/W3010717779","https://openalex.org/W3036125317","https://openalex.org/W3096807772","https://openalex.org/W3109943994","https://openalex.org/W3128416529","https://openalex.org/W4281550413","https://openalex.org/W4287162146","https://openalex.org/W4288109092","https://openalex.org/W4288331462","https://openalex.org/W4297809330","https://openalex.org/W4297809649","https://openalex.org/W4300799055","https://openalex.org/W4394649195","https://openalex.org/W6616173779","https://openalex.org/W6683821272","https://openalex.org/W6703271639","https://openalex.org/W6730641667","https://openalex.org/W6734215269","https://openalex.org/W6740801417","https://openalex.org/W6747473740","https://openalex.org/W6752089545","https://openalex.org/W6752983716","https://openalex.org/W6756293349","https://openalex.org/W6759871227","https://openalex.org/W6764173040","https://openalex.org/W6774915356","https://openalex.org/W6779715229","https://openalex.org/W6786644593","https://openalex.org/W6790486821","https://openalex.org/W6840258811","https://openalex.org/W6844225899"],"related_works":["https://openalex.org/W2056853153","https://openalex.org/W2057559274","https://openalex.org/W2005087563","https://openalex.org/W2378111931","https://openalex.org/W4243161226","https://openalex.org/W2950647290","https://openalex.org/W2620829895","https://openalex.org/W2356918560","https://openalex.org/W1968481813","https://openalex.org/W2392886708"],"abstract_inverted_index":{"Goal-Conditioned":[0],"Hierarchical":[1],"Reinforcement":[2],"Learning":[3],"(GCHRL)":[4],"is":[5,176],"a":[6,108,125,131,139],"promising":[7],"paradigm":[8],"to":[9,94],"address":[10],"the":[11,19,33,57],"exploration-exploitation":[12],"dilemma":[13],"in":[14,32,60,168],"reinforcement":[15,85],"learning.":[16],"It":[17],"decomposes":[18],"source":[20],"task":[21],"into":[22],"sub":[23,43,48,65,72],"goal":[24,44,49,66,73],"conditional":[25],"subtasks":[26],"and":[27,30,47,68,79,123,130,146,171],"conducts":[28],"exploration":[29,78,145],"exploitation":[31,147],"subgoal":[34,101,140],"space.":[35],"The":[36],"effectiveness":[37],"of":[38],"GCHRL":[39,61],"heavily":[40],"relies":[41],"on":[42,114,128,134,161],"representation":[45,110],"functions":[46],"selection":[50,74,141],"strategy.":[51],"However,":[52],"existing":[53],"works":[54],"often":[55],"overlook":[56],"temporal":[58,105],"coherence":[59,106],"when":[62],"learning":[63,86,111],"latent":[64,100,120],"representations":[67,102],"lack":[69],"an":[70],"efficient":[71],"strategy":[75,142],"that":[76,103,143,156],"balances":[77,144],"exploitation.":[80],"This":[81],"paper":[82],"proposes":[83],"HIerarchical":[84],"via":[87],"dynamically":[88,118],"building":[89],"Latent":[90],"Landmark":[91],"graphs":[92,122],"(HILL)":[93],"overcome":[95],"these":[96,115],"limitations.":[97],"HILL":[98,117,137,157],"learns":[99],"satisfy":[104],"using":[107],"contrastive":[109],"objective.":[112],"Based":[113],"representations,":[116],"builds":[119],"landmark":[121],"employs":[124],"novelty":[126],"measure":[127,133],"nodes":[129],"utility":[132],"edges.":[135],"Finally,":[136],"develops":[138],"by":[148],"jointly":[149],"considering":[150],"both":[151],"measures.":[152],"Experimental":[153],"results":[154],"demonstrate":[155],"outperforms":[158],"state-of-the-art":[159],"baselines":[160],"continuous":[162],"control":[163],"tasks":[164],"with":[165],"sparse":[166],"rewards":[167],"sample":[169],"efficiency":[170],"asymptotic":[172],"performance.":[173],"Our":[174],"code":[175],"available":[177],"at":[178],"https://github.com/papercode2022/HILL.":[179]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
