{"id":"https://openalex.org/W4385451973","doi":"https://doi.org/10.23919/ecc57647.2023.10178119","title":"A Painless Deterministic Policy Gradient Method for Learning-based MPC","display_name":"A Painless Deterministic Policy Gradient Method for Learning-based MPC","publication_year":2023,"publication_date":"2023-06-13","ids":{"openalex":"https://openalex.org/W4385451973","doi":"https://doi.org/10.23919/ecc57647.2023.10178119"},"language":"en","primary_location":{"id":"doi:10.23919/ecc57647.2023.10178119","is_oa":false,"landing_page_url":"http://dx.doi.org/10.23919/ecc57647.2023.10178119","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 European Control Conference (ECC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046127916","display_name":"Akhil S Anand","orcid":"https://orcid.org/0000-0002-3121-578X"},"institutions":[{"id":"https://openalex.org/I204778367","display_name":"Norwegian University of Science and Technology","ror":"https://ror.org/05xg72x27","country_code":"NO","type":"education","lineage":["https://openalex.org/I204778367"]}],"countries":["NO"],"is_corresponding":true,"raw_author_name":"Akhil S Anand","raw_affiliation_strings":["Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway","Dept. of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway"],"affiliations":[{"raw_affiliation_string":"Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway","institution_ids":["https://openalex.org/I204778367"]},{"raw_affiliation_string":"Dept. of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway","institution_ids":["https://openalex.org/I204778367"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076502958","display_name":"Dirk Reinhardt","orcid":"https://orcid.org/0000-0002-7027-4483"},"institutions":[{"id":"https://openalex.org/I204778367","display_name":"Norwegian University of Science and Technology","ror":"https://ror.org/05xg72x27","country_code":"NO","type":"education","lineage":["https://openalex.org/I204778367"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Dirk Reinhardt","raw_affiliation_strings":["Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway","Dept. of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway"],"affiliations":[{"raw_affiliation_string":"Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway","institution_ids":["https://openalex.org/I204778367"]},{"raw_affiliation_string":"Dept. of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway","institution_ids":["https://openalex.org/I204778367"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012590059","display_name":"Shambhuraj Sawant","orcid":null},"institutions":[{"id":"https://openalex.org/I204778367","display_name":"Norwegian University of Science and Technology","ror":"https://ror.org/05xg72x27","country_code":"NO","type":"education","lineage":["https://openalex.org/I204778367"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Shambhuraj Sawant","raw_affiliation_strings":["Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway","Dept. of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway"],"affiliations":[{"raw_affiliation_string":"Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway","institution_ids":["https://openalex.org/I204778367"]},{"raw_affiliation_string":"Dept. of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway","institution_ids":["https://openalex.org/I204778367"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034485872","display_name":"Jan Tommy Gravdahl","orcid":"https://orcid.org/0000-0002-5663-0795"},"institutions":[{"id":"https://openalex.org/I204778367","display_name":"Norwegian University of Science and Technology","ror":"https://ror.org/05xg72x27","country_code":"NO","type":"education","lineage":["https://openalex.org/I204778367"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Jan Tommy Gravdahl","raw_affiliation_strings":["Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway","Dept. of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway"],"affiliations":[{"raw_affiliation_string":"Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway","institution_ids":["https://openalex.org/I204778367"]},{"raw_affiliation_string":"Dept. of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway","institution_ids":["https://openalex.org/I204778367"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049645185","display_name":"S\u00e9bastien Gros","orcid":"https://orcid.org/0000-0001-6054-2133"},"institutions":[{"id":"https://openalex.org/I204778367","display_name":"Norwegian University of Science and Technology","ror":"https://ror.org/05xg72x27","country_code":"NO","type":"education","lineage":["https://openalex.org/I204778367"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Sebastien Gros","raw_affiliation_strings":["Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway","Dept. of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway"],"affiliations":[{"raw_affiliation_string":"Norwegian University of Science and Technology (NTNU),Dept. of Engineering Cybernetics,Trondheim,Norway","institution_ids":["https://openalex.org/I204778367"]},{"raw_affiliation_string":"Dept. of Engineering Cybernetics, Norwegian University of Science and Technology (NTNU), Trondheim, Norway","institution_ids":["https://openalex.org/I204778367"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5046127916"],"corresponding_institution_ids":["https://openalex.org/I204778367"],"apc_list":null,"apc_paid":null,"fwci":0.3756,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.58342451,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10409","display_name":"Fuel Cells and Related Materials","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8251524567604065},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.6850067377090454},{"id":"https://openalex.org/keywords/model-predictive-control","display_name":"Model predictive control","score":0.6510240435600281},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6460583806037903},{"id":"https://openalex.org/keywords/function-approximation","display_name":"Function approximation","score":0.6339696645736694},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5848681926727295},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5742931365966797},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5736685991287231},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5444428324699402},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4835355281829834},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3699817359447479},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.32134124636650085},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.28236156702041626},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2653205394744873},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.21658027172088623}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8251524567604065},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.6850067377090454},{"id":"https://openalex.org/C172205157","wikidata":"https://www.wikidata.org/wiki/Q1782962","display_name":"Model predictive control","level":3,"score":0.6510240435600281},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6460583806037903},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.6339696645736694},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5848681926727295},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5742931365966797},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5736685991287231},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5444428324699402},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4835355281829834},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3699817359447479},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32134124636650085},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.28236156702041626},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2653205394744873},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.21658027172088623},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/ecc57647.2023.10178119","is_oa":false,"landing_page_url":"http://dx.doi.org/10.23919/ecc57647.2023.10178119","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 European Control Conference (ECC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5400000214576721,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1982650427","https://openalex.org/W2112862244","https://openalex.org/W2155027007","https://openalex.org/W2156737235","https://openalex.org/W2586680856","https://openalex.org/W2787938642","https://openalex.org/W2901291754","https://openalex.org/W2930426397","https://openalex.org/W2948652605","https://openalex.org/W2963864421","https://openalex.org/W2992833799","https://openalex.org/W3100789280","https://openalex.org/W3138294267","https://openalex.org/W4225603049","https://openalex.org/W4226319110","https://openalex.org/W4285379000","https://openalex.org/W4285446252","https://openalex.org/W4296124382","https://openalex.org/W4302570325","https://openalex.org/W4313160026","https://openalex.org/W4394661925","https://openalex.org/W6683195989","https://openalex.org/W6683204974","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6748839928","https://openalex.org/W6756925679"],"related_works":["https://openalex.org/W2386410636","https://openalex.org/W2025663273","https://openalex.org/W3038962357","https://openalex.org/W3099153698","https://openalex.org/W1592209052","https://openalex.org/W1501190258","https://openalex.org/W3021988786","https://openalex.org/W2380199044","https://openalex.org/W3099311996","https://openalex.org/W2963971282"],"abstract_inverted_index":{"The":[0,63,126],"combination":[1],"of":[2,15,24,54,114,133,161],"Reinforcement":[3],"Learning":[4],"(RL)":[5],"and":[6],"Model":[7],"Predictive":[8],"Control":[9],"(MPC)":[10],"has":[11],"gained":[12],"a":[13,22,111],"lot":[14],"interest":[16],"in":[17],"the":[18,26,39,50,59,105,115,120,124,131,145,154],"recent":[19],"literature":[20],"as":[21,77,110,139],"way":[23],"computing":[25],"optimal":[27],"policies":[28],"from":[29,119],"MPC":[30,60,121,138],"schemes":[31],"based":[32],"on":[33],"inaccurate":[34],"models.":[35],"In":[36,98],"that":[37],"context,":[38],"Deterministic":[40],"Policy":[41],"Gradient":[42],"(DPG)":[43],"methods":[44,56,65,135],"are":[45,66],"often":[46,90],"observed":[47],"to":[48,57,69,103,149],"be":[49,150],"most":[51],"reliable":[52],"class":[53],"RL":[55],"improve":[58],"closed-loop":[61],"performance.":[62],"DPG":[64,134],"fairly":[67],"easy":[68],"formulate":[70],"when":[71],"used":[72],"with":[73,157],"compatible":[74],"function":[75,88,108,117,147],"approximation":[76,109],"an":[78,85],"advantage":[79],"function.":[80],"However,":[81],"this":[82,99],"formulation":[83],"requires":[84],"additional":[86,141],"value":[87,107,116,146],"approximation,":[89],"carried":[91],"out":[92],"using":[93],"Deep":[94],"Neural":[95],"Networks":[96],"(DNNs).":[97],"paper,":[100],"we":[101],"propose":[102],"estimate":[104,118],"required":[106],"first-order":[112],"expansion":[113],"scheme":[122],"providing":[123],"policy.":[125],"proposed":[127,155],"approach":[128,156],"drastically":[129],"simplifies":[130],"use":[132],"for":[136,143],"learning-based":[137],"no":[140],"structure":[142],"approximating":[144],"needs":[148],"constructed.":[151],"We":[152],"illustrate":[153],"two":[158],"numerical":[159],"examples":[160],"varying":[162],"complexity.":[163]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
