{"id":"https://openalex.org/W4380887568","doi":"https://doi.org/10.1145/3575813.3595202","title":"Rule-based Policy Regularization for Reinforcement Learning-based Building Control","display_name":"Rule-based Policy Regularization for Reinforcement Learning-based Building Control","publication_year":2023,"publication_date":"2023-06-16","ids":{"openalex":"https://openalex.org/W4380887568","doi":"https://doi.org/10.1145/3575813.3595202"},"language":"en","primary_location":{"id":"doi:10.1145/3575813.3595202","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3575813.3595202","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3575813.3595202","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Future Energy Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3575813.3595202","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101600786","display_name":"Hsin\u2010Yu Liu","orcid":"https://orcid.org/0000-0002-9316-2150"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hsin-Yu Liu","raw_affiliation_strings":["Electrical and Computer Engineering, University of California San Diego, USA"],"raw_orcid":"https://orcid.org/0000-0002-9316-2150","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of California San Diego, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101689740","display_name":"Bharathan Balaji","orcid":"https://orcid.org/0000-0002-9490-2018"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bharathan Balaji","raw_affiliation_strings":["Amazon, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-9490-2018","affiliations":[{"raw_affiliation_string":"Amazon, United States of America","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078959213","display_name":"Rajesh K. Gupta","orcid":"https://orcid.org/0000-0002-6489-7633"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rajesh Gupta","raw_affiliation_strings":["University of California San Diego, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-6489-7633","affiliations":[{"raw_affiliation_string":"University of California San Diego, United States of America","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088730125","display_name":"Dezhi Hong","orcid":"https://orcid.org/0000-0001-5224-6043"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dezhi Hong","raw_affiliation_strings":["Amazon, United States of America"],"raw_orcid":"https://orcid.org/0000-0001-5224-6043","affiliations":[{"raw_affiliation_string":"Amazon, United States of America","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4411,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.79297037,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"242","last_page":"265"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10121","display_name":"Building Energy and Comfort Optimization","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10121","display_name":"Building Energy and Comfort Optimization","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9587000012397766,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8226840496063232},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7360219955444336},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5030362010002136},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.478464812040329},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.471858948469162},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44205474853515625},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.42593422532081604},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.4187811613082886},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3791053891181946},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10872572660446167}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8226840496063232},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7360219955444336},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5030362010002136},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.478464812040329},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.471858948469162},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44205474853515625},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.42593422532081604},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.4187811613082886},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3791053891181946},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10872572660446167},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3575813.3595202","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3575813.3595202","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3575813.3595202","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Future Energy Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:escholarship.org:ark:/13030/qt88g8p97s","is_oa":true,"landing_page_url":"https://escholarship.org/uc/item/88g8p97s","pdf_url":"https://escholarship.org/content/qt88g8p97s/qt88g8p97s.pdf?t=rwnfrn","source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1145/3575813.3595202","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3575813.3595202","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3575813.3595202","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Future Energy Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.9100000262260437,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4380887568.pdf","grobid_xml":"https://content.openalex.org/works/W4380887568.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1839500782","https://openalex.org/W1980056622","https://openalex.org/W2016289761","https://openalex.org/W2158782408","https://openalex.org/W2625874945","https://openalex.org/W2754517384","https://openalex.org/W2957897705","https://openalex.org/W2963575966","https://openalex.org/W2966012238","https://openalex.org/W2970971581","https://openalex.org/W2977843878","https://openalex.org/W3024350433","https://openalex.org/W3033324992","https://openalex.org/W3037429136","https://openalex.org/W3097499542","https://openalex.org/W3099050578","https://openalex.org/W3129082882","https://openalex.org/W3154227779","https://openalex.org/W3216341996","https://openalex.org/W4200503332","https://openalex.org/W4226494903","https://openalex.org/W4283397781","https://openalex.org/W6780995382"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2389214306","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W2024136090","https://openalex.org/W2000428801"],"abstract_inverted_index":{"Rule-based":[0],"control":[1,83,160],"(RBC)":[2],"is":[3,26,57],"widely":[4],"adopted":[5],"in":[6,46,59,97,147,166,176,191],"buildings":[7],"due":[8,62],"to":[9,30,38,42,63,77,85,89,125,133,181],"its":[10],"stability":[11],"and":[12,94,100,108,113,150,170,188],"robustness.":[13],"It":[14],"resembles":[15],"a":[16,81,92,119,182],"behavior":[17],"cloning":[18],"methodology":[19],"refined":[20],"by":[21,67,169,175],"human":[22],"experts;":[23],"however,":[24],"it":[25],"incapable":[27],"of":[28,185],"adapting":[29],"distribution":[31],"drifts.":[32],"Reinforcement":[33],"learning":[34,55],"(RL)":[35],"can":[36],"adapt":[37],"changes":[39],"but":[40],"needs":[41],"learn":[43,91,134],"from":[44,135],"scratch":[45],"the":[47,51,54,172],"online":[48,99,107,177],"setting.":[49],"On":[50],"other":[52],"hand,":[53],"ability":[56],"limited":[58],"offline":[60,101,109,167],"settings":[61,168,178],"extrapolation":[64],"errors":[65],"caused":[66],"selecting":[68],"out-of-distribution":[69],"actions.":[70],"In":[71],"this":[72],"paper,":[73],"we":[74,117,153],"explore":[75],"how":[76],"incorporate":[78],"RL":[79,110,131],"with":[80,105,179],"rule-based":[82,158],"policy":[84,96,129],"combine":[86],"their":[87],"strengths":[88],"continuously":[90],"scalable":[93],"robust":[95],"both":[98,148],"settings.":[102],"We":[103],"start":[104],"representative":[106],"methods,":[111],"TD3":[112],"TD3+BC,":[114],"respectively.":[115],"Then,":[116],"develop":[118],"dynamically":[120],"weighted":[121],"actor":[122],"loss":[123],"function":[124],"selectively":[126],"choose":[127],"which":[128],"for":[130],"models":[132],"at":[136],"each":[137],"training":[138],"iteration.":[139],"With":[140],"extensive":[141],"experiments":[142],"across":[143],"various":[144],"weather":[145],"conditions":[146],"deterministic":[149],"stochastic":[151],"scenarios,":[152],"demonstrate":[154],"that":[155],"our":[156],"algorithm,":[157],"incorporated":[159],"regularization":[161],"(RUBICON),":[162],"outperforms":[163],"state-of-the-art":[164],"methods":[165],"improves":[171],"baseline":[173],"method":[174],"respect":[180],"reward":[183],"consisting":[184],"thermal":[186],"comfort":[187],"energy":[189],"consumption":[190],"building-RL":[192],"environments.":[193]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
