{"id":"https://openalex.org/W4415179585","doi":"https://doi.org/10.23919/ecc65951.2025.11187042","title":"Convergent NMPC-based Reinforcement Learning Using Deep Expected Sarsa and Nonlinear Temporal Difference Learning","display_name":"Convergent NMPC-based Reinforcement Learning Using Deep Expected Sarsa and Nonlinear Temporal Difference Learning","publication_year":2025,"publication_date":"2025-06-24","ids":{"openalex":"https://openalex.org/W4415179585","doi":"https://doi.org/10.23919/ecc65951.2025.11187042"},"language":"en","primary_location":{"id":"doi:10.23919/ecc65951.2025.11187042","is_oa":false,"landing_page_url":"https://doi.org/10.23919/ecc65951.2025.11187042","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 European Control Conference (ECC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-04983821v1/file/Salaje%20et%20al.%20-%20ECC%202025.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107099068","display_name":"Amine Salaje","orcid":"https://orcid.org/0009-0002-8546-5351"},"institutions":[{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]},{"id":"https://openalex.org/I2802505300","display_name":"\u00c9cole Sup\u00e9rieure d'Ing\u00e9nieurs en G\u00e9nie \u00c9lectrique","ror":"https://ror.org/01apwkd48","country_code":"FR","type":"education","lineage":["https://openalex.org/I2802505300"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Amine Salaje","raw_affiliation_strings":["Universite de Rouen, ESIGELEC, IRSEEM,Rouen,France,76000"],"affiliations":[{"raw_affiliation_string":"Universite de Rouen, ESIGELEC, IRSEEM,Rouen,France,76000","institution_ids":["https://openalex.org/I62396329","https://openalex.org/I2802505300"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099137379","display_name":"Thomas Chevet","orcid":null},"institutions":[{"id":"https://openalex.org/I2802505300","display_name":"\u00c9cole Sup\u00e9rieure d'Ing\u00e9nieurs en G\u00e9nie \u00c9lectrique","ror":"https://ror.org/01apwkd48","country_code":"FR","type":"education","lineage":["https://openalex.org/I2802505300"]},{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Thomas Chevet","raw_affiliation_strings":["Universite de Rouen, ESIGELEC, IRSEEM,Rouen,France,76000"],"affiliations":[{"raw_affiliation_string":"Universite de Rouen, ESIGELEC, IRSEEM,Rouen,France,76000","institution_ids":["https://openalex.org/I62396329","https://openalex.org/I2802505300"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020981126","display_name":"Nicolas Langlois","orcid":"https://orcid.org/0000-0003-1712-0677"},"institutions":[{"id":"https://openalex.org/I62396329","display_name":"Universit\u00e9 de Rouen Normandie","ror":"https://ror.org/03nhjew95","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210105918","https://openalex.org/I62396329"]},{"id":"https://openalex.org/I2802505300","display_name":"\u00c9cole Sup\u00e9rieure d'Ing\u00e9nieurs en G\u00e9nie \u00c9lectrique","ror":"https://ror.org/01apwkd48","country_code":"FR","type":"education","lineage":["https://openalex.org/I2802505300"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Nicolas Langlois","raw_affiliation_strings":["Universite de Rouen, ESIGELEC, IRSEEM,Rouen,France,76000"],"affiliations":[{"raw_affiliation_string":"Universite de Rouen, ESIGELEC, IRSEEM,Rouen,France,76000","institution_ids":["https://openalex.org/I62396329","https://openalex.org/I2802505300"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5107099068"],"corresponding_institution_ids":["https://openalex.org/I2802505300","https://openalex.org/I62396329"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14884077,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2644","last_page":"2649"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.27790001034736633,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.27790001034736633,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.2676999866962433,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7742999792098999},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.6847000122070312},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6189000010490417},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.6025000214576721},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.5468999743461609},{"id":"https://openalex.org/keywords/function-approximation","display_name":"Function approximation","score":0.5368000268936157},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.5264000296592712},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.48240000009536743}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7742999792098999},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.6847000122070312},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6189000010490417},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.6025000214576721},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.5468999743461609},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.5368000268936157},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.5264000296592712},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5044999718666077},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.48240000009536743},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.4778999984264374},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4438000023365021},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4172999858856201},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3970000147819519},{"id":"https://openalex.org/C148043351","wikidata":"https://www.wikidata.org/wiki/Q4456944","display_name":"Current (fluid)","level":2,"score":0.36739999055862427},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.36090001463890076},{"id":"https://openalex.org/C172205157","wikidata":"https://www.wikidata.org/wiki/Q1782962","display_name":"Model predictive control","level":3,"score":0.36090001463890076},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.32739999890327454},{"id":"https://openalex.org/C207821765","wikidata":"https://www.wikidata.org/wiki/Q405372","display_name":"Instability","level":2,"score":0.31940001249313354},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2702000141143799},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.2596000134944916},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.2563000023365021}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.23919/ecc65951.2025.11187042","is_oa":false,"landing_page_url":"https://doi.org/10.23919/ecc65951.2025.11187042","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 European Control Conference (ECC)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-04983821v1","is_oa":true,"landing_page_url":"https://hal.science/hal-04983821","pdf_url":"https://hal.science/hal-04983821v1/file/Salaje%20et%20al.%20-%20ECC%202025.pdf","source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"European Control Conference, Jun 2025, Thessaloniki, Greece. pp.2644-2649, &#x27E8;10.23919/ECC65951.2025.11187042&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-04983821v1","is_oa":true,"landing_page_url":"https://hal.science/hal-04983821","pdf_url":"https://hal.science/hal-04983821v1/file/Salaje%20et%20al.%20-%20ECC%202025.pdf","source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"European Control Conference, Jun 2025, Thessaloniki, Greece. pp.2644-2649, &#x27E8;10.23919/ECC65951.2025.11187042&#x27E9;","raw_type":"Conference papers"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2514272077","display_name":null,"funder_award_id":"ANR-20-THIA-0021","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G6238311721","display_name":null,"funder_award_id":"HAISCoDe","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"}],"funders":[{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"},{"id":"https://openalex.org/F4320329938","display_name":"R\u00e9gion Normandie","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415179585.pdf","grobid_xml":"https://content.openalex.org/works/W4415179585.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,72,122],"present":[4,154],"a":[5,45,57,63,129,133,169],"learning-based":[6],"nonlinear":[7],"model":[8],"predictive":[9],"controller":[10,36],"(NMPC)":[11],"using":[12],"an":[13],"original":[14],"reinforcement":[15],"learning":[16,100],"(RL)":[17],"method":[18,141],"to":[19,69,74,92,142,168],"learn":[20],"the":[21,25,35,40,50,75,78,82,88,94,99,104,107,109,118,137,144,156,164],"optimal":[22,171],"weights":[23],"of":[24,44,81,106,136],"NMPC":[26,131],"scheme,":[27],"for":[28],"which":[29],"two":[30],"methods":[31,127],"are":[32,153],"proposed.":[33],"Firstly,":[34],"is":[37,60,90,113],"used":[38],"as":[39,132],"current":[41,79],"action-value":[42,52,95],"function":[43,96,134,157],"deep":[46],"Expected":[47,138],"Sarsa":[48,139],"where":[49],"subsequent":[51],"function,":[53],"usually":[54],"obtained":[55],"with":[56,62,103,128],"secondary":[58],"NMPC,":[59],"approximated":[61],"neural":[64],"network":[65,89],"(NN).":[66],"With":[67],"respect":[68],"existing":[70],"methods,":[71],"add":[73],"NN\u2019s":[76],"input":[77],"value":[80],"NMPC\u2019s":[83],"learned":[84],"parameters":[85],"so":[86],"that":[87,163],"able":[91],"approximate":[93],"and":[97,148],"stabilize":[98],"performance.":[101,120],"Additionally,":[102],"use":[105],"NN,":[108],"real-time":[110],"computational":[111],"burden":[112],"approximately":[114],"halved":[115],"without":[116,173],"affecting":[117],"closed-loop":[119],"Secondly,":[121],"combine":[123],"gradient":[124],"temporal":[125],"difference":[126],"parametrized":[130],"approximator":[135],"RL":[140],"overcome":[143],"potential":[145],"parameters\u2019":[146],"divergence":[147],"instability":[149,174],"issues":[150],"when":[151],"nonlinearities":[152],"in":[155],"approximation.":[158],"The":[159],"simulation":[160],"results":[161],"show":[162],"proposed":[165],"approach":[166],"converges":[167],"locally":[170],"solution":[172],"problems.":[175]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
