{"id":"https://openalex.org/W4406121228","doi":"https://doi.org/10.1007/s11633-023-1482-0","title":"Latent Landmark Graph for Efficient Exploration-exploitation Balance in Hierarchical Reinforcement Learning","display_name":"Latent Landmark Graph for Efficient Exploration-exploitation Balance in Hierarchical Reinforcement Learning","publication_year":2025,"publication_date":"2025-01-07","ids":{"openalex":"https://openalex.org/W4406121228","doi":"https://doi.org/10.1007/s11633-023-1482-0"},"language":"en","primary_location":{"id":"doi:10.1007/s11633-023-1482-0","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s11633-023-1482-0","pdf_url":null,"source":{"id":"https://openalex.org/S4210224602","display_name":"Machine Intelligence Research","issn_l":"2731-538X","issn":["2731-538X","2731-5398"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Intelligence Research","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049583121","display_name":"Qingyang Zhang","orcid":"https://orcid.org/0000-0001-5387-9942"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingyang Zhang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, 100190, China","School of Future Technology, University of Chinese Academy of Sciences, Beijing, 100049, China"],"raw_orcid":"https://orcid.org/0000-0001-5387-9942","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, 100190, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Future Technology, University of Chinese Academy of Sciences, Beijing, 100049, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102732442","display_name":"Hongming Zhang","orcid":"https://orcid.org/0000-0003-4905-6569"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Hongming Zhang","raw_affiliation_strings":["Department of Computing Science, University of Alberta, Edmonton, T6G 2E8, Canada"],"raw_orcid":"https://orcid.org/0000-0003-4905-6569","affiliations":[{"raw_affiliation_string":"Department of Computing Science, University of Alberta, Edmonton, T6G 2E8, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101684094","display_name":"Dengpeng Xing","orcid":"https://orcid.org/0000-0002-8251-9118"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dengpeng Xing","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, 100190, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, 100049, China"],"raw_orcid":"https://orcid.org/0000-0002-8251-9118","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, 100190, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, 100049, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108642431","display_name":"Bo Xu","orcid":"https://orcid.org/0000-0002-1111-1529"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bo Xu","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, 100190, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, 100049, China","School of Future Technology, University of Chinese Academy of Sciences, Beijing, 100049, China"],"raw_orcid":"https://orcid.org/0000-0002-1111-1529","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, 100190, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, 100049, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"School of Future Technology, University of Chinese Academy of Sciences, Beijing, 100049, China","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5108642431"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":3.4624,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.91471654,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"22","issue":"2","first_page":"267","last_page":"288"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9778000116348267,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/landmark","display_name":"Landmark","score":0.9153745770454407},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6575003862380981},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5275696516036987},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5124831199645996},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5053896903991699},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4650973081588745},{"id":"https://openalex.org/keywords/balance","display_name":"Balance (ability)","score":0.456412136554718},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36419063806533813},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.3257567882537842},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.27218207716941833},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.16445016860961914},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.12618908286094666},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.10480883717536926}],"concepts":[{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.9153745770454407},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6575003862380981},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5275696516036987},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5124831199645996},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5053896903991699},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4650973081588745},{"id":"https://openalex.org/C168031717","wikidata":"https://www.wikidata.org/wiki/Q1530280","display_name":"Balance (ability)","level":2,"score":0.456412136554718},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36419063806533813},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3257567882537842},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.27218207716941833},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.16445016860961914},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.12618908286094666},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.10480883717536926}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11633-023-1482-0","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s11633-023-1482-0","pdf_url":null,"source":{"id":"https://openalex.org/S4210224602","display_name":"Machine Intelligence Research","issn_l":"2731-538X","issn":["2731-538X","2731-5398"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Intelligence Research","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5199999809265137,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":67,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W1970139083","https://openalex.org/W2012833704","https://openalex.org/W2096733369","https://openalex.org/W2109910161","https://openalex.org/W2145267250","https://openalex.org/W2146444479","https://openalex.org/W2158782408","https://openalex.org/W2322143332","https://openalex.org/W2335959470","https://openalex.org/W2440926996","https://openalex.org/W2561776174","https://openalex.org/W2563408008","https://openalex.org/W2594829461","https://openalex.org/W2602856279","https://openalex.org/W2624503621","https://openalex.org/W2781726626","https://openalex.org/W2786917922","https://openalex.org/W2787757704","https://openalex.org/W2803281228","https://openalex.org/W2811111819","https://openalex.org/W2900928600","https://openalex.org/W2908675041","https://openalex.org/W2911495555","https://openalex.org/W2912083425","https://openalex.org/W2912269676","https://openalex.org/W2913954081","https://openalex.org/W2914304175","https://openalex.org/W2922388521","https://openalex.org/W2950885698","https://openalex.org/W2963761387","https://openalex.org/W2966477753","https://openalex.org/W2967293465","https://openalex.org/W2971865858","https://openalex.org/W2973223114","https://openalex.org/W2976657239","https://openalex.org/W2979216345","https://openalex.org/W2997289589","https://openalex.org/W3036125317","https://openalex.org/W3038282378","https://openalex.org/W3096914115","https://openalex.org/W3100226171","https://openalex.org/W3102554291","https://openalex.org/W3108819287","https://openalex.org/W3109943994","https://openalex.org/W3205041129","https://openalex.org/W3208334305","https://openalex.org/W4211171425","https://openalex.org/W4281550413","https://openalex.org/W4287162146","https://openalex.org/W4288325380","https://openalex.org/W4289281722","https://openalex.org/W4297809649","https://openalex.org/W4308075625","https://openalex.org/W4319659130","https://openalex.org/W4382239700","https://openalex.org/W4382449266","https://openalex.org/W4385488604","https://openalex.org/W4394649195","https://openalex.org/W6616173779","https://openalex.org/W6640963894","https://openalex.org/W6683821272","https://openalex.org/W6740801417","https://openalex.org/W6759871227","https://openalex.org/W6767649332","https://openalex.org/W6774915356","https://openalex.org/W6790486821"],"related_works":["https://openalex.org/W2056853153","https://openalex.org/W2057559274","https://openalex.org/W2026924879","https://openalex.org/W2005087563","https://openalex.org/W2378111931","https://openalex.org/W2052388267","https://openalex.org/W2950647290","https://openalex.org/W2620829895","https://openalex.org/W2356918560","https://openalex.org/W4243161226"],"abstract_inverted_index":null,"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2025-10-10T00:00:00"}
