{"id":"https://openalex.org/W4403923016","doi":"https://doi.org/10.1145/3671127.3698163","title":"Adaptive Policy Regularization for Offline-to-Online Reinforcement Learning in HVAC Control","display_name":"Adaptive Policy Regularization for Offline-to-Online Reinforcement Learning in HVAC Control","publication_year":2024,"publication_date":"2024-10-29","ids":{"openalex":"https://openalex.org/W4403923016","doi":"https://doi.org/10.1145/3671127.3698163"},"language":"en","primary_location":{"id":"doi:10.1145/3671127.3698163","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3671127.3698163","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 11th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101600786","display_name":"Hsin\u2010Yu Liu","orcid":"https://orcid.org/0000-0002-9316-2150"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hsin-Yu Liu","raw_affiliation_strings":["University of California, San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101689740","display_name":"Bharathan Balaji","orcid":"https://orcid.org/0000-0002-9490-2018"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bharathan Balaji","raw_affiliation_strings":["Amazon, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078959213","display_name":"Rajesh K. Gupta","orcid":"https://orcid.org/0000-0002-6489-7633"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rajesh Gupta","raw_affiliation_strings":["University of California, San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088730125","display_name":"Dezhi Hong","orcid":"https://orcid.org/0000-0001-5224-6043"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dezhi Hong","raw_affiliation_strings":["Amazon, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, USA","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101600786"],"corresponding_institution_ids":["https://openalex.org/I36258959"],"apc_list":null,"apc_paid":null,"fwci":0.2962,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.58595005,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10121","display_name":"Building Energy and Comfort Optimization","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10121","display_name":"Building Energy and Comfort Optimization","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8429933786392212},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.7100639343261719},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6638721823692322},{"id":"https://openalex.org/keywords/hvac","display_name":"HVAC","score":0.6541897654533386},{"id":"https://openalex.org/keywords/adaptive-control","display_name":"Adaptive control","score":0.4964368939399719},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.44495901465415955},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3943358063697815},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.3650679886341095},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33331841230392456},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.15953877568244934},{"id":"https://openalex.org/keywords/air-conditioning","display_name":"Air conditioning","score":0.07582929730415344}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8429933786392212},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.7100639343261719},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6638721823692322},{"id":"https://openalex.org/C122346748","wikidata":"https://www.wikidata.org/wiki/Q1798773","display_name":"HVAC","level":3,"score":0.6541897654533386},{"id":"https://openalex.org/C107464732","wikidata":"https://www.wikidata.org/wiki/Q235781","display_name":"Adaptive control","level":3,"score":0.4964368939399719},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.44495901465415955},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3943358063697815},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3650679886341095},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33331841230392456},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.15953877568244934},{"id":"https://openalex.org/C103742991","wikidata":"https://www.wikidata.org/wiki/Q173725","display_name":"Air conditioning","level":2,"score":0.07582929730415344},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3671127.3698163","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3671127.3698163","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 11th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W106792269","https://openalex.org/W1839500782","https://openalex.org/W2121863487","https://openalex.org/W2788862220","https://openalex.org/W2957897705","https://openalex.org/W2966012238","https://openalex.org/W2982984621","https://openalex.org/W3024350433","https://openalex.org/W3033324992","https://openalex.org/W3216341996","https://openalex.org/W4200503332","https://openalex.org/W4226494903","https://openalex.org/W4283397781","https://openalex.org/W4310984691"],"related_works":["https://openalex.org/W2112866972","https://openalex.org/W4240233711","https://openalex.org/W2900606913","https://openalex.org/W4320003279","https://openalex.org/W2326910963","https://openalex.org/W3111008797","https://openalex.org/W4287552621","https://openalex.org/W4376649626","https://openalex.org/W593427938","https://openalex.org/W2166790877"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,38],"(RL)-based":[2],"control":[3],"methods":[4],"have":[5],"been":[6],"extensively":[7],"studied":[8],"to":[9,86,94,120],"improve":[10],"building":[11],"heating,":[12],"ventilation,":[13],"and":[14,24,152],"air":[15],"conditioning":[16],"(HVAC)":[17],"efficiency.":[18],"Data-driven":[19],"approaches":[20,144],"demonstrate":[21,139],"better":[22],"transferability":[23],"scalability,":[25],"making":[26],"them":[27],"useful":[28],"in":[29,54],"real-world":[30],"applications.":[31],"Most":[32],"prior":[33],"works":[34,60],"focus":[35],"on":[36,58,65,105],"online":[37,92,101],"requiring":[39],"simulators":[40,49],"or":[41],"models":[42,90],"of":[43,131],"environment":[44],"dynamics.":[45],"However,":[46],"transferring":[47],"thermal":[48],"between":[50],"environments":[51],"is":[52,74],"inefficient":[53],"practice.":[55],"We":[56,97],"build":[57],"recent":[59],"that":[61,99],"employ":[62],"offline":[63,72,89,106],"training":[64,119],"static":[66],"datasets":[67],"from":[68],"unknown":[69],"policies.":[70,107],"Pure":[71],"RL":[73,85],"constrained":[75],"by":[76,150],"the":[77,115,122],"replay":[78],"buffer's":[79],"distribution,":[80],"we":[81,111,126],"propose":[82,112],"using":[83],"offline-to-online":[84],"enhance":[87],"pre-trained":[88,154],"through":[91],"adaptation":[93],"distribution":[95],"shifts.":[96],"show":[98],"direct":[100],"fine-tuning":[102],"deteriorates":[103],"performance":[104,149],"To":[108],"address":[109],"this,":[110],"automatically":[113],"tuning":[114],"actor's":[116],"regularization":[117],"during":[118],"optimize":[121],"exploration-exploitation":[123],"tradeoff.":[124],"Specifically,":[125],"leverage":[127],"simple":[128],"moving":[129],"averages":[130],"mean":[132],"Q-values":[133],"sampled":[134],"throughout":[135],"training.":[136],"Simulation":[137],"experiments":[138],"our":[140],"method":[141],"outperforms":[142],"state-of-the-art":[143],"under":[145],"various":[146],"conditions,":[147],"improving":[148],"32.9%":[151],"enhancing":[153],"models'":[155],"capabilities":[156],"online.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
