{"id":"https://openalex.org/W3174995978","doi":"https://doi.org/10.1007/s10458-021-09518-6","title":"Exploration in policy optimization through multiple paths","display_name":"Exploration in policy optimization through multiple paths","publication_year":2021,"publication_date":"2021-06-26","ids":{"openalex":"https://openalex.org/W3174995978","doi":"https://doi.org/10.1007/s10458-021-09518-6","mag":"3174995978"},"language":"en","primary_location":{"id":"doi:10.1007/s10458-021-09518-6","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s10458-021-09518-6","pdf_url":null,"source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090888372","display_name":"Ling Pan","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ling Pan","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016589992","display_name":"Qingpeng Cai","orcid":"https://orcid.org/0000-0001-6451-9299"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingpeng Cai","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082905458","display_name":"Longbo Huang","orcid":"https://orcid.org/0000-0002-7341-447X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longbo Huang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5090888372"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":null,"fwci":0.2799,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.62981531,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"35","issue":"2","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.8060221076011658},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.787544846534729},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7031254172325134},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5781909227371216},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5456985235214233},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5411874055862427},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.4733350872993469},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.4689599573612213},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3556813597679138},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3163226842880249},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.21223491430282593},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16129612922668457}],"concepts":[{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.8060221076011658},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.787544846534729},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7031254172325134},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5781909227371216},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5456985235214233},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5411874055862427},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.4733350872993469},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.4689599573612213},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3556813597679138},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3163226842880249},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.21223491430282593},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16129612922668457},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C27206212","wikidata":"https://www.wikidata.org/wiki/Q34178","display_name":"Theology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s10458-021-09518-6","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s10458-021-09518-6","pdf_url":null,"source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-133975","is_oa":false,"landing_page_url":"http://www.scopus.com/record/display.url?eid=2-s2.0-85184973251&origin=inward","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.49000000953674316}],"awards":[{"id":"https://openalex.org/G72957147","display_name":null,"funder_award_id":"61672316","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W1608057971","https://openalex.org/W1639032689","https://openalex.org/W2001947543","https://openalex.org/W2158782408","https://openalex.org/W2165150801","https://openalex.org/W2168405694","https://openalex.org/W2173248099","https://openalex.org/W2260756217","https://openalex.org/W2417786368","https://openalex.org/W2419612459","https://openalex.org/W2554984891","https://openalex.org/W2561776174","https://openalex.org/W2594103415","https://openalex.org/W2596982695","https://openalex.org/W2604373826","https://openalex.org/W2614839826","https://openalex.org/W2620671107","https://openalex.org/W2727576081","https://openalex.org/W2778749116","https://openalex.org/W2781726626","https://openalex.org/W2785542505","https://openalex.org/W2786036274","https://openalex.org/W2786552986","https://openalex.org/W2787800669","https://openalex.org/W2787938642","https://openalex.org/W2794487566","https://openalex.org/W2810754397","https://openalex.org/W2889893643","https://openalex.org/W2894662639","https://openalex.org/W2898585845","https://openalex.org/W2905016180","https://openalex.org/W2943810245","https://openalex.org/W2962777832","https://openalex.org/W2962839548","https://openalex.org/W2963024489","https://openalex.org/W2963160877","https://openalex.org/W2963184621","https://openalex.org/W2963430540","https://openalex.org/W2963523627","https://openalex.org/W2963849886","https://openalex.org/W2963900541","https://openalex.org/W2966128956","https://openalex.org/W3037462640","https://openalex.org/W4231104606","https://openalex.org/W4233471163","https://openalex.org/W6638018090","https://openalex.org/W6684205842","https://openalex.org/W6695011786","https://openalex.org/W6739193204","https://openalex.org/W6740092555","https://openalex.org/W6744123322","https://openalex.org/W6750138292","https://openalex.org/W6751285671","https://openalex.org/W6751824613","https://openalex.org/W6751869817","https://openalex.org/W6757444583","https://openalex.org/W6770061890"],"related_works":["https://openalex.org/W2381242807","https://openalex.org/W3126131230","https://openalex.org/W2347541121","https://openalex.org/W4288804799","https://openalex.org/W2080951048","https://openalex.org/W3089617106","https://openalex.org/W3032237421","https://openalex.org/W3011883280","https://openalex.org/W2390346111","https://openalex.org/W2369082698"],"abstract_inverted_index":null,"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
