{"id":"https://openalex.org/W4388561984","doi":"https://doi.org/10.1109/icstcc59206.2023.10308475","title":"Learning-Based Model Predictive Control Using Double Q-Learning","display_name":"Learning-Based Model Predictive Control Using Double Q-Learning","publication_year":2023,"publication_date":"2023-10-11","ids":{"openalex":"https://openalex.org/W4388561984","doi":"https://doi.org/10.1109/icstcc59206.2023.10308475"},"language":"en","primary_location":{"id":"doi:10.1109/icstcc59206.2023.10308475","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icstcc59206.2023.10308475","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 27th International Conference on System Theory, Control and Computing (ICSTCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://ieeexplore.ieee.org/document/10308475","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015141503","display_name":"Hoomaan MoradiMaryamnegari","orcid":"https://orcid.org/0000-0002-0858-0420"},"institutions":[{"id":"https://openalex.org/I171543936","display_name":"Free University of Bozen-Bolzano","ror":"https://ror.org/012ajp527","country_code":"IT","type":"education","lineage":["https://openalex.org/I171543936"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Hoomaan MoradiMaryamnegari","raw_affiliation_strings":["Free University of Bozen-Bolzano,Faculty of Engineering,Bolzano,Italy","Faculty of Engineering, Free University of Bozen-Bolzano, Bolzano, Italy"],"affiliations":[{"raw_affiliation_string":"Free University of Bozen-Bolzano,Faculty of Engineering,Bolzano,Italy","institution_ids":["https://openalex.org/I171543936"]},{"raw_affiliation_string":"Faculty of Engineering, Free University of Bozen-Bolzano, Bolzano, Italy","institution_ids":["https://openalex.org/I171543936"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080554397","display_name":"Marco Frego","orcid":null},"institutions":[{"id":"https://openalex.org/I171543936","display_name":"Free University of Bozen-Bolzano","ror":"https://ror.org/012ajp527","country_code":"IT","type":"education","lineage":["https://openalex.org/I171543936"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marco Frego","raw_affiliation_strings":["Free University of Bozen-Bolzano,Faculty of Engineering,Bolzano,Italy","Faculty of Engineering, Free University of Bozen-Bolzano, Bolzano, Italy"],"affiliations":[{"raw_affiliation_string":"Free University of Bozen-Bolzano,Faculty of Engineering,Bolzano,Italy","institution_ids":["https://openalex.org/I171543936"]},{"raw_affiliation_string":"Faculty of Engineering, Free University of Bozen-Bolzano, Bolzano, Italy","institution_ids":["https://openalex.org/I171543936"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083668298","display_name":"Angelika Peer","orcid":"https://orcid.org/0000-0002-2896-9011"},"institutions":[{"id":"https://openalex.org/I171543936","display_name":"Free University of Bozen-Bolzano","ror":"https://ror.org/012ajp527","country_code":"IT","type":"education","lineage":["https://openalex.org/I171543936"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Angelika Peer","raw_affiliation_strings":["Free University of Bozen-Bolzano,Faculty of Engineering,Bolzano,Italy","Faculty of Engineering, Free University of Bozen-Bolzano, Bolzano, Italy"],"affiliations":[{"raw_affiliation_string":"Free University of Bozen-Bolzano,Faculty of Engineering,Bolzano,Italy","institution_ids":["https://openalex.org/I171543936"]},{"raw_affiliation_string":"Faculty of Engineering, Free University of Bozen-Bolzano, Bolzano, Italy","institution_ids":["https://openalex.org/I171543936"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5015141503"],"corresponding_institution_ids":["https://openalex.org/I171543936"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17641859,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"349","last_page":"355"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7805989980697632},{"id":"https://openalex.org/keywords/model-predictive-control","display_name":"Model predictive control","score":0.7326667308807373},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.6181818246841431},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5767098069190979},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5514097809791565},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.5395163297653198},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.5249007940292358},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4872364103794098},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.370466947555542},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3688538372516632},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.32933148741722107},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.32866811752319336},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.29791757464408875}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7805989980697632},{"id":"https://openalex.org/C172205157","wikidata":"https://www.wikidata.org/wiki/Q1782962","display_name":"Model predictive control","level":3,"score":0.7326667308807373},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.6181818246841431},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5767098069190979},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5514097809791565},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.5395163297653198},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.5249007940292358},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4872364103794098},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.370466947555542},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3688538372516632},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.32933148741722107},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.32866811752319336},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29791757464408875},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icstcc59206.2023.10308475","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icstcc59206.2023.10308475","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 27th International Conference on System Theory, Control and Computing (ICSTCC)","raw_type":"proceedings-article"},{"id":"pmh:oai:unibz.it:11315707070001241","is_oa":true,"landing_page_url":"https://ieeexplore.ieee.org/document/10308475","pdf_url":null,"source":{"id":"https://openalex.org/S4210197018","display_name":"View","issn_l":"2688-268X","issn":["2688-268X","2688-3988"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:unibz.it:11315707070001241","is_oa":true,"landing_page_url":"https://ieeexplore.ieee.org/document/10308475","pdf_url":null,"source":{"id":"https://openalex.org/S4210197018","display_name":"View","issn_l":"2688-268X","issn":["2688-268X","2688-3988"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.41999998688697815}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2155968351","https://openalex.org/W2600756564","https://openalex.org/W2930426397","https://openalex.org/W2948652605","https://openalex.org/W2976657239","https://openalex.org/W3136876223","https://openalex.org/W3138294267","https://openalex.org/W3156316153","https://openalex.org/W3185528958","https://openalex.org/W3186347355","https://openalex.org/W4200080422","https://openalex.org/W4210977611","https://openalex.org/W4212979410","https://openalex.org/W4214717370","https://openalex.org/W4225603049","https://openalex.org/W4226319110","https://openalex.org/W4285379000","https://openalex.org/W4287017924","https://openalex.org/W4289822971","https://openalex.org/W4312445281","https://openalex.org/W4387914188","https://openalex.org/W4401880584","https://openalex.org/W6677067356","https://openalex.org/W6775536256","https://openalex.org/W6840297893"],"related_works":["https://openalex.org/W1990079087","https://openalex.org/W2101188133","https://openalex.org/W4306904969","https://openalex.org/W3202234113","https://openalex.org/W4248731570","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W2381210024"],"abstract_inverted_index":{"In":[0,27],"this":[1,28],"work,":[2],"we":[3,87],"present":[4],"a":[5,10,19,136],"new":[6],"method":[7],"to":[8,103,109,128,148,166],"tune":[9],"Model":[11],"Predictive":[12],"Controller":[13],"(MPC)":[14],"with":[15,33],"the":[16,42,48,68,72,85,90,93,104,122,144,154,160],"help":[17],"of":[18,36,50,57,71,82,92],"Reinforcement":[20],"Learning":[21],"(RL)":[22],"algorithm":[23,95],"called":[24],"Double":[25,73],"Q-Learning.":[26],"algorithm,":[29],"two":[30,80],"function":[31,53,70],"approximators":[32],"different":[34],"sets":[35,81],"parameters":[37,56,83],"are":[38,101],"trained":[39],"simultaneously.":[40],"First,":[41],"nonlinear":[43],"MPC":[44,123],"is":[45,65,115,125],"parametrized":[46],"in":[47,84,121],"weights":[49],"its":[51,58],"cost":[52],"and":[54,60],"unknown":[55],"equality":[59],"inequality":[61],"constraints.":[62],"Then,":[63],"it":[64],"defined":[66],"as":[67],"action-value":[69],"Q-Learning":[74,131,156],"algorithm.":[75,132],"By":[76],"randomly":[77],"switching":[78],"between":[79],"MPC,":[86],"show":[88,140],"that":[89,141],"exploration":[91],"proposed":[94],"increases.":[96],"Since":[97],"model":[98,113],"error":[99],"terms":[100],"added":[102],"baseline":[105],"stage":[106],"cost,":[107],"thanks":[108],"more":[110,168],"exploration,":[111],"less":[112,119],"mismatch":[114],"obtained.":[116],"With":[117],"this,":[118],"bias":[120],"controller":[124],"achieved":[126],"compared":[127],"an":[129],"MPC-based":[130,155],"Simulation":[133],"results":[134],"on":[135],"coupled":[137],"tanks":[138],"system":[139],"not":[142],"only":[143],"training":[145],"process":[146],"resulted":[147],"be":[149,167],"faster":[150],"than":[151],"observed":[152],"for":[153],"method,":[157],"but":[158],"also":[159],"final":[161],"control":[162],"performance":[163],"was":[164],"found":[165],"desirable.":[169]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
