{"id":"https://openalex.org/W4415593494","doi":"https://doi.org/10.1109/tcst.2025.3620521","title":"AC4MPC: Actor-Critic Reinforcement Learning for Guiding Model Predictive Control","display_name":"AC4MPC: Actor-Critic Reinforcement Learning for Guiding Model Predictive Control","publication_year":2025,"publication_date":"2025-10-27","ids":{"openalex":"https://openalex.org/W4415593494","doi":"https://doi.org/10.1109/tcst.2025.3620521"},"language":"en","primary_location":{"id":"doi:10.1109/tcst.2025.3620521","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcst.2025.3620521","pdf_url":null,"source":{"id":"https://openalex.org/S133363738","display_name":"IEEE Transactions on Control Systems Technology","issn_l":"1063-6536","issn":["1063-6536","1558-0865","2374-0159"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Control Systems Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017656696","display_name":"Rudolf Reiter","orcid":"https://orcid.org/0009-0007-7635-2132"},"institutions":[{"id":"https://openalex.org/I237525767","display_name":"Zurich Insurance Group (Switzerland)","ror":"https://ror.org/03cw8qd03","country_code":"CH","type":"company","lineage":["https://openalex.org/I237525767"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Rudolf Reiter","raw_affiliation_strings":["Department of Informatics, Robotics and Perception Group, University of Zurich, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0009-0007-7635-2132","affiliations":[{"raw_affiliation_string":"Department of Informatics, Robotics and Perception Group, University of Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I237525767"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040811425","display_name":"Andrea Ghezzi","orcid":"https://orcid.org/0000-0002-1420-4560"},"institutions":[{"id":"https://openalex.org/I161046081","display_name":"University of Freiburg","ror":"https://ror.org/0245cg223","country_code":"DE","type":"education","lineage":["https://openalex.org/I161046081"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Andrea Ghezzi","raw_affiliation_strings":["Department of Microsystems Engineering (IMTEK), University of Freiburg, Freiburg, Germany"],"raw_orcid":"https://orcid.org/0000-0002-1420-4560","affiliations":[{"raw_affiliation_string":"Department of Microsystems Engineering (IMTEK), University of Freiburg, Freiburg, Germany","institution_ids":["https://openalex.org/I161046081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012277347","display_name":"Katrin Baumg\u00e4rtner","orcid":"https://orcid.org/0009-0004-0485-8386"},"institutions":[{"id":"https://openalex.org/I161046081","display_name":"University of Freiburg","ror":"https://ror.org/0245cg223","country_code":"DE","type":"education","lineage":["https://openalex.org/I161046081"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Katrin Baumg\u00e4rtner","raw_affiliation_strings":["Department of Microsystems Engineering (IMTEK), University of Freiburg, Freiburg, Germany"],"raw_orcid":"https://orcid.org/0009-0004-0485-8386","affiliations":[{"raw_affiliation_string":"Department of Microsystems Engineering (IMTEK), University of Freiburg, Freiburg, Germany","institution_ids":["https://openalex.org/I161046081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000812504","display_name":"Jasper Hoffmann","orcid":"https://orcid.org/0000-0002-0724-0790"},"institutions":[{"id":"https://openalex.org/I161046081","display_name":"University of Freiburg","ror":"https://ror.org/0245cg223","country_code":"DE","type":"education","lineage":["https://openalex.org/I161046081"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jasper Hoffmann","raw_affiliation_strings":["Department of Computer Science, University of Freiburg, Freiburg, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Freiburg, Freiburg, Germany","institution_ids":["https://openalex.org/I161046081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009355093","display_name":"Robert D. McAllister","orcid":"https://orcid.org/0000-0002-5687-6875"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Robert D. McAllister","raw_affiliation_strings":["Delft Center for Systems and Control, Delft University of Technology, Delft, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-5687-6875","affiliations":[{"raw_affiliation_string":"Delft Center for Systems and Control, Delft University of Technology, Delft, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057375078","display_name":"Moritz Diehl","orcid":"https://orcid.org/0000-0001-6556-8252"},"institutions":[{"id":"https://openalex.org/I161046081","display_name":"University of Freiburg","ror":"https://ror.org/0245cg223","country_code":"DE","type":"education","lineage":["https://openalex.org/I161046081"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Moritz Diehl","raw_affiliation_strings":["Department of Microsystems Engineering and the Department of Mathematics, University of Freiburg, Freiburg, Germany"],"raw_orcid":"https://orcid.org/0000-0001-6556-8252","affiliations":[{"raw_affiliation_string":"Department of Microsystems Engineering and the Department of Mathematics, University of Freiburg, Freiburg, Germany","institution_ids":["https://openalex.org/I161046081"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.6355,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.90891444,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"34","issue":"1","first_page":"395","last_page":"410"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.921999990940094,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/model-predictive-control","display_name":"Model predictive control","score":0.7537999749183655},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7462000250816345},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.6926000118255615},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5430999994277954},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.5424000024795532},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.47130000591278076},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.4510999917984009},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.4375}],"concepts":[{"id":"https://openalex.org/C172205157","wikidata":"https://www.wikidata.org/wiki/Q1782962","display_name":"Model predictive control","level":3,"score":0.7537999749183655},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7462000250816345},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.6926000118255615},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6517999768257141},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5430999994277954},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.5424000024795532},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.47130000591278076},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.4510999917984009},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.4375},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.4311999976634979},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4185999929904938},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.4011000096797943},{"id":"https://openalex.org/C2778448659","wikidata":"https://www.wikidata.org/wiki/Q1931051","display_name":"Overtaking","level":2,"score":0.3935999870300293},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.3301999866962433},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32190001010894775},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3084999918937683},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.28450000286102295},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C91581856","wikidata":"https://www.wikidata.org/wiki/Q2707001","display_name":"Nonlinear control","level":3,"score":0.25999999046325684},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcst.2025.3620521","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcst.2025.3620521","pdf_url":null,"source":{"id":"https://openalex.org/S133363738","display_name":"IEEE Transactions on Control Systems Technology","issn_l":"1063-6536","issn":["1063-6536","1558-0865","2374-0159"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Control Systems Technology","raw_type":"journal-article"},{"id":"pmh:oai:freidok.uni-freiburg.de:275255","is_oa":false,"landing_page_url":"https://freidok.uni-freiburg.de/data/275255","pdf_url":null,"source":{"id":"https://openalex.org/S4306401057","display_name":"FreiDok plus (Universit\u00e4tsbibliothek Freiburg)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I161046081","host_organization_name":"University of Freiburg","host_organization_lineage":["https://openalex.org/I161046081"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE transactions on control systems technology. - 34, 1 (2025) , 395-410, ISSN: 1558-0865","raw_type":"article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1562460111","https://openalex.org/W1941445455","https://openalex.org/W1967821692","https://openalex.org/W1971804534","https://openalex.org/W2004975435","https://openalex.org/W2026216293","https://openalex.org/W2027968610","https://openalex.org/W2049670620","https://openalex.org/W2090395694","https://openalex.org/W2123871098","https://openalex.org/W2141249007","https://openalex.org/W2150884987","https://openalex.org/W2162878264","https://openalex.org/W2165545652","https://openalex.org/W2166483538","https://openalex.org/W2178380636","https://openalex.org/W2515477780","https://openalex.org/W2525738796","https://openalex.org/W2617586537","https://openalex.org/W2765650568","https://openalex.org/W2887155241","https://openalex.org/W2893078623","https://openalex.org/W2904246096","https://openalex.org/W2930426397","https://openalex.org/W2967084668","https://openalex.org/W2968192014","https://openalex.org/W3010920281","https://openalex.org/W3041042960","https://openalex.org/W3155272911","https://openalex.org/W3186347355","https://openalex.org/W3189223768","https://openalex.org/W3207305612","https://openalex.org/W3208705304","https://openalex.org/W4206105733","https://openalex.org/W4237591687","https://openalex.org/W4293775970","https://openalex.org/W4300309110","https://openalex.org/W4314946881","https://openalex.org/W4321380998","https://openalex.org/W4365790399","https://openalex.org/W4385301320","https://openalex.org/W4385452022","https://openalex.org/W4388918651","https://openalex.org/W4390422151","https://openalex.org/W4396869641"],"related_works":[],"abstract_inverted_index":{"Nonlinear":[0],"model":[1],"predictive":[2],"control":[3,12,63,93],"(MPC)":[4],"and":[5,46,149,163,206],"reinforcement":[6],"learning":[7],"(RL)":[8],"are":[9,99,195],"two":[10],"powerful":[11],"strategies":[13],"with":[14],"complementary":[15],"advantages.":[16],"This":[17],"work":[18],"shows":[19],"how":[20],"actor-critic":[21],"RL":[22,34,134,147],"techniques":[23],"can":[24],"be":[25],"leveraged":[26],"to":[27,101,187],"improve":[28],"the":[29,42,55,59,79,87,96,102,114,120,127,132,143,146,150,153,169,174],"performance":[30],"of":[31,41,58,113,131,145,152,171],"MPC.":[32,60],"The":[33,92,177,197],"critic":[35,175],"is":[36,65,71,90,124,180,199],"used":[37],"as":[38],"an":[39,47,51,137,202,207],"approximation":[40],"optimal":[43,161],"value":[44],"function,":[45],"actor":[48,80,135,148],"rollout":[49,81],"provides":[50],"initial":[52,76],"guess":[53],"for":[54,74,182,193],"primal":[56],"variables":[57],"A":[61],"parallel":[62],"architecture":[64],"proposed":[66,115,178],"where":[67,184],"each":[68,105],"MPC":[69,194],"instance":[70],"solved":[72],"twice":[73],"different":[75],"guesses.":[77],"Besides":[78],"initialization,":[82],"a":[83],"shifted":[84],"initialization":[85],"from":[86,95],"previous":[88],"solution":[89],"used.":[91],"actions":[94],"lowest-cost":[97],"trajectory":[98],"applied":[100],"system":[103],"at":[104],"time":[106],"step.":[107],"We":[108],"provide":[109],"some":[110],"theoretical":[111],"justification":[112],"algorithm":[116,179],"by":[117,126],"establishing":[118],"that":[119,140,165],"discounted":[121,128],"closed-loop":[122,129],"cost":[123,130],"upper-bounded":[125],"original":[133],"plus":[136],"error":[138],"term":[139],"depends":[141],"on":[142],"(sub)optimality":[144],"accuracy":[151],"critic.":[154],"These":[155],"results":[156],"do":[157],"not":[158],"require":[159],"globally":[160],"solutions":[162],"indicate":[164],"larger":[166],"horizons":[167],"mitigate":[168],"effect":[170],"errors":[172],"in":[173,201],"approximation.":[176],"intended":[181],"applications":[183],"standard":[185],"methods":[186],"construct":[188],"terminal":[189],"costs":[190],"or":[191],"constraints":[192],"impractical.":[196],"approach":[198],"demonstrated":[200],"illustrative":[203],"toy":[204],"example":[205],"autonomous":[208],"driving":[209],"overtaking":[210],"scenario.":[211]},"counts_by_year":[{"year":2026,"cited_by_count":3}],"updated_date":"2026-06-17T08:01:34.144755","created_date":"2025-10-28T00:00:00"}
