{"id":"https://openalex.org/W4391020496","doi":"https://doi.org/10.1109/cdc49753.2023.10383956","title":"Computationally Efficient Reinforcement Learning: Targeted Exploration leveraging Simple Rules","display_name":"Computationally Efficient Reinforcement Learning: Targeted Exploration leveraging Simple Rules","publication_year":2023,"publication_date":"2023-12-13","ids":{"openalex":"https://openalex.org/W4391020496","doi":"https://doi.org/10.1109/cdc49753.2023.10383956"},"language":"en","primary_location":{"id":"doi:10.1109/cdc49753.2023.10383956","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc49753.2023.10383956","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 62nd IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://infoscience.epfl.ch/handle/20.500.14299/203102","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056840318","display_name":"Loris Di Natale","orcid":"https://orcid.org/0000-0002-3295-412X"},"institutions":[{"id":"https://openalex.org/I71824836","display_name":"Swiss Federal Laboratories for Materials Science and Technology","ror":"https://ror.org/02x681a42","country_code":"CH","type":"facility","lineage":["https://openalex.org/I2799323385","https://openalex.org/I71824836"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Loris Di Natale","raw_affiliation_strings":["Urban Energy Systems Lab,Empa,D&#x00FC;bendorf,Switzerland"],"affiliations":[{"raw_affiliation_string":"Urban Energy Systems Lab,Empa,D&#x00FC;bendorf,Switzerland","institution_ids":["https://openalex.org/I71824836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005151144","display_name":"Bratislav Svetozarevic","orcid":"https://orcid.org/0000-0001-8921-3915"},"institutions":[{"id":"https://openalex.org/I71824836","display_name":"Swiss Federal Laboratories for Materials Science and Technology","ror":"https://ror.org/02x681a42","country_code":"CH","type":"facility","lineage":["https://openalex.org/I2799323385","https://openalex.org/I71824836"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Bratislav Svetozarevic","raw_affiliation_strings":["Urban Energy Systems Lab,Empa,D&#x00FC;bendorf,Switzerland"],"affiliations":[{"raw_affiliation_string":"Urban Energy Systems Lab,Empa,D&#x00FC;bendorf,Switzerland","institution_ids":["https://openalex.org/I71824836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069545422","display_name":"Philipp Heer","orcid":"https://orcid.org/0000-0003-2999-5753"},"institutions":[{"id":"https://openalex.org/I71824836","display_name":"Swiss Federal Laboratories for Materials Science and Technology","ror":"https://ror.org/02x681a42","country_code":"CH","type":"facility","lineage":["https://openalex.org/I2799323385","https://openalex.org/I71824836"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Philipp Heer","raw_affiliation_strings":["Urban Energy Systems Lab,Empa,D&#x00FC;bendorf,Switzerland"],"affiliations":[{"raw_affiliation_string":"Urban Energy Systems Lab,Empa,D&#x00FC;bendorf,Switzerland","institution_ids":["https://openalex.org/I71824836"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085117832","display_name":"Colin N. Jones","orcid":"https://orcid.org/0000-0001-7239-4799"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Colin N. Jones","raw_affiliation_strings":["Laboratoire d&#x0027;Automatique,EPFL,Lausanne,Switzerland"],"affiliations":[{"raw_affiliation_string":"Laboratoire d&#x0027;Automatique,EPFL,Lausanne,Switzerland","institution_ids":["https://openalex.org/I5124864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5056840318"],"corresponding_institution_ids":["https://openalex.org/I71824836"],"apc_list":null,"apc_paid":null,"fwci":0.1758,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60382583,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"2334","last_page":"2339"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9598000049591064,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9569000005722046,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8860486149787903},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7781602144241333},{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.7079768180847168},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.5178768634796143},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4896131157875061},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.44165655970573425},{"id":"https://openalex.org/keywords/sample-complexity","display_name":"Sample complexity","score":0.434007465839386},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4201093018054962},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.38174521923065186}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8860486149787903},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7781602144241333},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.7079768180847168},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.5178768634796143},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4896131157875061},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.44165655970573425},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.434007465839386},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4201093018054962},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38174521923065186},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/cdc49753.2023.10383956","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc49753.2023.10383956","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 62nd IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},{"id":"pmh:oai:dora:empa_37152","is_oa":false,"landing_page_url":"https://www.dora.lib4ri.ch/empa/islandora/object/empa%3A37152","pdf_url":null,"source":{"id":"https://openalex.org/S4306401298","display_name":"DORA Empa (Swiss Federal Laboratories for Materials Science and Technology (Empa))","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I71824836","host_organization_name":"Swiss Federal Laboratories for Materials Science and Technology","host_organization_lineage":["https://openalex.org/I71824836"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Proceedings Paper"},{"id":"pmh:oai:infoscience.epfl.ch:307261","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/203102","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"conference paper"}],"best_oa_location":{"id":"pmh:oai:infoscience.epfl.ch:307261","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/203102","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"conference paper"},"sustainable_development_goals":[{"score":0.7099999785423279,"display_name":"No poverty","id":"https://metadata.un.org/sdg/1"}],"awards":[{"id":"https://openalex.org/G6035327784","display_name":null,"funder_award_id":"51NF40_180545","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"}],"funders":[{"id":"https://openalex.org/F4320320924","display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","ror":"https://ror.org/00yjd3n13"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1845972764","https://openalex.org/W2539402368","https://openalex.org/W2578206533","https://openalex.org/W2784465508","https://openalex.org/W2788862220","https://openalex.org/W2963099939","https://openalex.org/W2963575966","https://openalex.org/W2973229164","https://openalex.org/W2990138404","https://openalex.org/W2994779380","https://openalex.org/W3024350433","https://openalex.org/W3034311880","https://openalex.org/W3090832565","https://openalex.org/W3128176255","https://openalex.org/W3156919398","https://openalex.org/W3161175155","https://openalex.org/W3166052165","https://openalex.org/W4224290225","https://openalex.org/W4281480521","https://openalex.org/W4283373492","https://openalex.org/W4285254953","https://openalex.org/W4287846201","https://openalex.org/W4292551110","https://openalex.org/W4297792208","https://openalex.org/W4323783439","https://openalex.org/W6639175102","https://openalex.org/W6684205842","https://openalex.org/W6737893269","https://openalex.org/W6748839928","https://openalex.org/W6785204124","https://openalex.org/W6796367035"],"related_works":["https://openalex.org/W1585007175","https://openalex.org/W2364252372","https://openalex.org/W4234066492","https://openalex.org/W2382521049","https://openalex.org/W1998063895","https://openalex.org/W4306904969","https://openalex.org/W2877093712","https://openalex.org/W2116157560","https://openalex.org/W4310614650","https://openalex.org/W4386738330"],"abstract_inverted_index":{"Model-free":[0],"Reinforcement":[1],"Learning":[2],"(RL)":[3],"generally":[4],"suffers":[5],"from":[6],"poor":[7],"sample":[8],"complexity,":[9],"mostly":[10],"due":[11],"to":[12,15,21,40,48,68,82,120,142,144,148],"the":[13,18,26,35,76,88,96,100,113,118,122,129],"need":[14],"exhaustively":[16],"explore":[17],"state-action":[19,77],"space":[20,78],"find":[22],"well-performing":[23,145],"policies.":[24],"On":[25,132],"other":[27],"hand,":[28],"we":[29,44,56,94],"postulate":[30],"that":[31,79],"expert":[32],"knowledge":[33],"of":[34,64,75,90,117],"system":[36],"often":[37],"allows":[38,140],"us":[39],"design":[41],"simple":[42,60],"rules":[43,71],"expect":[45],"good":[46,160],"policies":[47,146],"follow":[49],"at":[50],"all":[51],"times.":[52],"In":[53],"this":[54],"work,":[55],"hence":[57],"propose":[58],"a":[59,133],"yet":[61],"effective":[62],"modification":[63],"continuous":[65],"actor-critic":[66],"frameworks":[67],"incorporate":[69],"such":[70],"and":[72,157],"avoid":[73],"regions":[74],"are":[80],"known":[81],"be":[83],"suboptimal,":[84],"thereby":[85],"significantly":[86],"accelerating":[87],"convergence":[89],"RL":[91],"agents.":[92],"Concretely,":[93],"saturate":[95],"actions":[97],"chosen":[98],"by":[99,128],"agent":[101],"if":[102],"they":[103],"do":[104],"not":[105,126],"comply":[106],"with":[107],"our":[108],"intuition":[109],"and,":[110],"critically,":[111],"modify":[112],"gradient":[114],"update":[115],"step":[116],"policy":[119],"ensure":[121],"learning":[123],"process":[124],"is":[125],"affected":[127],"saturation":[130],"step.":[131],"room":[134],"temperature":[135],"control":[136],"case":[137],"study,":[138],"it":[139],"agents":[141,153],"converge":[143],"up":[147],"6\u20137\u00d7":[149],"faster":[150],"than":[151],"classical":[152],"without":[154],"computational":[155],"overhead":[156],"while":[158],"retaining":[159],"final":[161],"performance.":[162]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
