{"id":"https://openalex.org/W7154692704","doi":"https://doi.org/10.1007/s13218-026-00908-0","title":"Deep Reinforcement Learning for Price-Aware Building Heating Control","display_name":"Deep Reinforcement Learning for Price-Aware Building Heating Control","publication_year":2026,"publication_date":"2026-03-01","ids":{"openalex":"https://openalex.org/W7154692704","doi":"https://doi.org/10.1007/s13218-026-00908-0"},"language":"en","primary_location":{"id":"doi:10.1007/s13218-026-00908-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s13218-026-00908-0","pdf_url":null,"source":{"id":"https://openalex.org/S4210234608","display_name":"KI - K\u00fcnstliche Intelligenz","issn_l":"0933-1875","issn":["0933-1875","1610-1987"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"KI - K\u00fcnstliche Intelligenz","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1007/s13218-026-00908-0","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013879483","display_name":"Qiong Huang","orcid":"https://orcid.org/0000-0002-1958-6094"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Qiong Huang","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0002-1958-6094","affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133843639","display_name":"Adrian Till Assmuth","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adrian Till Assmuth","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0009-0005-7402-0770","affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004584210","display_name":"Felix Langner","orcid":"https://orcid.org/0000-0002-3473-5545"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Felix Langner","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0002-3473-5545","affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122362243","display_name":"Benjamin Sch\u00e4fer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benjamin Sch\u00e4fer","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0003-1607-9748","affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133913370","display_name":"Veit Hagenmeyer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Veit Hagenmeyer","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5013879483"],"corresponding_institution_ids":[],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.64391461,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"1","first_page":"17","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10121","display_name":"Building Energy and Comfort Optimization","score":0.21220000088214874,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10121","display_name":"Building Energy and Comfort Optimization","score":0.21220000088214874,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.20430000126361847,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.05869999900460243,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4397999942302704},{"id":"https://openalex.org/keywords/control-system","display_name":"Control system","score":0.37459999322891235},{"id":"https://openalex.org/keywords/temperature-control","display_name":"Temperature control","score":0.32989999651908875},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.30630001425743103},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.30239999294281006},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.29980000853538513}],"concepts":[{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4397999942302704},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4332999885082245},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.39719998836517334},{"id":"https://openalex.org/C17500928","wikidata":"https://www.wikidata.org/wiki/Q959968","display_name":"Control system","level":2,"score":0.37459999322891235},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.3587999939918518},{"id":"https://openalex.org/C536315585","wikidata":"https://www.wikidata.org/wiki/Q7698332","display_name":"Temperature control","level":2,"score":0.32989999651908875},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.30630001425743103},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.30239999294281006},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2985000014305115},{"id":"https://openalex.org/C155386361","wikidata":"https://www.wikidata.org/wiki/Q1649571","display_name":"Process control","level":3,"score":0.2849999964237213},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C107464732","wikidata":"https://www.wikidata.org/wiki/Q235781","display_name":"Adaptive control","level":3,"score":0.27379998564720154},{"id":"https://openalex.org/C39432304","wikidata":"https://www.wikidata.org/wiki/Q188847","display_name":"Environmental science","level":0,"score":0.25999999046325684},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.258899986743927}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s13218-026-00908-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s13218-026-00908-0","pdf_url":null,"source":{"id":"https://openalex.org/S4210234608","display_name":"KI - K\u00fcnstliche Intelligenz","issn_l":"0933-1875","issn":["0933-1875","1610-1987"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"KI - K\u00fcnstliche Intelligenz","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s13218-026-00908-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s13218-026-00908-0","pdf_url":null,"source":{"id":"https://openalex.org/S4210234608","display_name":"KI - K\u00fcnstliche Intelligenz","issn_l":"0933-1875","issn":["0933-1875","1610-1987"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"KI - K\u00fcnstliche Intelligenz","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1537738310","https://openalex.org/W2145339207","https://openalex.org/W2277948250","https://openalex.org/W2625874945","https://openalex.org/W2903646426","https://openalex.org/W3033217105","https://openalex.org/W3034748593","https://openalex.org/W3193645253","https://openalex.org/W4306253561","https://openalex.org/W4315480006","https://openalex.org/W4319345905","https://openalex.org/W4378571872","https://openalex.org/W4386337718","https://openalex.org/W4389265746","https://openalex.org/W4391265349","https://openalex.org/W4399762674","https://openalex.org/W7124166191"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"Heating":[1],"systems":[2],"account":[3],"for":[4,17,190,199],"a":[5,59,99,134,141],"significant":[6],"share":[7],"of":[8,49,105,181,188],"residential":[9],"energy":[10,14],"consumption,":[11],"and":[12,73,81,94,140,152,175,192],"rising":[13],"prices":[15],"call":[16],"intelligent,":[18],"cost-aware":[19],"control":[20,30,55,195],"strategies.":[21,118],"Traditional":[22],"methods,":[23],"such":[24],"as":[25],"rule-based":[26],"or":[27,37,126],"model":[28,100],"predictive":[29],"(MPC),":[31],"often":[32],"require":[33],"detailed":[34,200],"system":[35],"modeling":[36],"lack":[38],"adaptability":[39],"to":[40,54,114],"dynamic":[41],"price":[42,96,110,122],"signals.":[43],"This":[44],"work":[45],"explores":[46],"the":[47,102,106,131,146,169,177,186,197],"use":[48],"deep":[50,70],"reinforcement":[51],"learning":[52],"(DRL)":[53],"heat":[56],"pumps":[57],"in":[58,89,158],"way":[60],"that":[61],"balances":[62],"occupant":[63],"comfort":[64,128,143,151],"with":[65,98,124,137],"energy-cost":[66],"minimization.":[67],"We":[68],"evaluate":[69],"Q-network":[71],"(DQN)":[72],"proximal":[74],"policy":[75,171],"optimization":[76],"(PPO)":[77],"methods":[78],"across":[79],"discrete":[80,138],"continuous":[82],"action":[83],"spaces.":[84],"The":[85,183],"agents":[86],"are":[87,112],"trained":[88],"simulation":[90],"using":[91],"real":[92],"weather":[93],"electricity":[95,109],"data,":[97],"representing":[101],"thermal":[103],"dynamics":[104],"building.":[107],"Short-term":[108],"forecasts":[111],"included":[113],"enable":[115],"anticipatory":[116],"heating":[117,194],"Reward":[119],"functions":[120],"combine":[121],"penalties":[123],"piecewise-linear":[125,142],"quadratic":[127],"penalties.":[129],"Among":[130],"DRL":[132,189],"variants,":[133],"DQN":[135,170],"agent":[136],"actions":[139],"reward":[144],"achieves":[145],"best":[147,157],"overall":[148],"trade-off":[149],"between":[150],"cost.":[153],"MPC":[154,173],"still":[155],"performs":[156],"absolute":[159],"cost":[160],"terms":[161],"because":[162],"it":[163],"uses":[164],"an":[165],"exact":[166],"model,":[167],"while":[168],"approaches":[172],"performance":[174],"retains":[176],"model-free,":[178],"adaptive":[179,191],"advantages":[180],"RL.":[182],"findings":[184],"highlight":[185],"potential":[187],"price-aware":[193],"without":[196],"need":[198],"physical":[201],"modeling.":[202]},"counts_by_year":[],"updated_date":"2026-05-23T06:10:36.450269","created_date":"2026-04-18T00:00:00"}
