{"id":"https://openalex.org/W7123348856","doi":"https://doi.org/10.1109/cdc57313.2025.11312015","title":"MAD: A Magnitude And Direction Policy Parametrization for Stability Constrained Reinforcement Learning","display_name":"MAD: A Magnitude And Direction Policy Parametrization for Stability Constrained Reinforcement Learning","publication_year":2025,"publication_date":"2025-12-09","ids":{"openalex":"https://openalex.org/W7123348856","doi":"https://doi.org/10.1109/cdc57313.2025.11312015"},"language":"en","primary_location":{"id":"doi:10.1109/cdc57313.2025.11312015","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc57313.2025.11312015","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 64th Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://infoscience.epfl.ch/handle/20.500.14299/258046","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045054592","display_name":"Luca Furieri","orcid":"https://orcid.org/0000-0001-6103-4480"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Luca Furieri","raw_affiliation_strings":["&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Institute of Mechanical Engineering"],"affiliations":[{"raw_affiliation_string":"&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Institute of Mechanical Engineering","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030250253","display_name":"Sucheth Shenoy","orcid":"https://orcid.org/0000-0002-0600-8439"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Sucheth Shenoy","raw_affiliation_strings":["&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Institute of Mechanical Engineering"],"affiliations":[{"raw_affiliation_string":"&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Institute of Mechanical Engineering","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042667379","display_name":"Danilo Saccani","orcid":"https://orcid.org/0000-0002-3059-9160"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Danilo Saccani","raw_affiliation_strings":["&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Institute of Mechanical Engineering"],"affiliations":[{"raw_affiliation_string":"&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Institute of Mechanical Engineering","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080064387","display_name":"Andrea Martin","orcid":"https://orcid.org/0000-0003-3460-0840"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Andrea Martin","raw_affiliation_strings":["&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Institute of Mechanical Engineering"],"affiliations":[{"raw_affiliation_string":"&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Institute of Mechanical Engineering","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5121015449","display_name":"Giancarlo Ferrari-Trecate","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Giancarlo Ferrari-Trecate","raw_affiliation_strings":["&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Institute of Mechanical Engineering"],"affiliations":[{"raw_affiliation_string":"&#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Institute of Mechanical Engineering","institution_ids":["https://openalex.org/I5124864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5045054592"],"corresponding_institution_ids":["https://openalex.org/I5124864"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.77579568,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"942","last_page":"947"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.6872000098228455,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.6872000098228455,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.15449999272823334,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.03060000017285347,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8208000063896179},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.7001000046730042},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.546500027179718},{"id":"https://openalex.org/keywords/parametrization","display_name":"Parametrization (atmospheric modeling)","score":0.5073999762535095},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.49639999866485596},{"id":"https://openalex.org/keywords/magnitude","display_name":"Magnitude (astronomy)","score":0.49059998989105225},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.46810001134872437},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.42750000953674316},{"id":"https://openalex.org/keywords/completeness","display_name":"Completeness (order theory)","score":0.4235000014305115}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8208000063896179},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.7001000046730042},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.546500027179718},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.516700029373169},{"id":"https://openalex.org/C202887219","wikidata":"https://www.wikidata.org/wiki/Q3895221","display_name":"Parametrization (atmospheric modeling)","level":3,"score":0.5073999762535095},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.49639999866485596},{"id":"https://openalex.org/C126691448","wikidata":"https://www.wikidata.org/wiki/Q2028919","display_name":"Magnitude (astronomy)","level":2,"score":0.49059998989105225},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.46810001134872437},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.42750000953674316},{"id":"https://openalex.org/C17231256","wikidata":"https://www.wikidata.org/wiki/Q5156540","display_name":"Completeness (order theory)","level":2,"score":0.4235000014305115},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.39730000495910645},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3885999917984009},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3668000102043152},{"id":"https://openalex.org/C167964875","wikidata":"https://www.wikidata.org/wiki/Q17011487","display_name":"Exponential stability","level":3,"score":0.3546000123023987},{"id":"https://openalex.org/C12843","wikidata":"https://www.wikidata.org/wiki/Q201721","display_name":"Gravitational singularity","level":2,"score":0.33709999918937683},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.33219999074935913},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.32580000162124634},{"id":"https://openalex.org/C79379906","wikidata":"https://www.wikidata.org/wiki/Q3174497","display_name":"Dynamical systems theory","level":2,"score":0.31380000710487366},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.30000001192092896},{"id":"https://openalex.org/C143170015","wikidata":"https://www.wikidata.org/wiki/Q17007850","display_name":"Stability conditions","level":3,"score":0.29600000381469727},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.29319998621940613},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C33962884","wikidata":"https://www.wikidata.org/wiki/Q378637","display_name":"Dynamical system (definition)","level":3,"score":0.28870001435279846},{"id":"https://openalex.org/C176321772","wikidata":"https://www.wikidata.org/wiki/Q1430640","display_name":"Numerical stability","level":3,"score":0.2833999991416931},{"id":"https://openalex.org/C200288055","wikidata":"https://www.wikidata.org/wiki/Q2621792","display_name":"Element (criminal law)","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.27790001034736633},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2563000023365021}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/cdc57313.2025.11312015","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc57313.2025.11312015","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 64th Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},{"id":"pmh:oai:infoscience.epfl.ch:20.500.14299/258046","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/258046","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference proceedings"}],"best_oa_location":{"id":"pmh:oai:infoscience.epfl.ch:20.500.14299/258046","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/258046","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference proceedings"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.48877665400505066,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,112],"introduce":[1,57,132],"magnitude":[2,85],"and":[3,38],"direction":[4,102],"(MAD)":[5],"policies,":[6],"a":[7,64,93,108],"policy":[8,128,161],"parameterization":[9],"for":[10,21],"reinforcement":[11,71],"learning":[12,72],"(RL)":[13],"that":[14,154],"preserves":[15],"\u2113<inf":[16,49,95],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[17,50,96],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">p</inf>":[18],"closed-loop":[19,77,141,180],"stability":[20,117,142,181],"nonlinear":[22,36],"dynamical":[23],"systems.":[24],"Despite":[25],"their":[26],"completeness":[27],"in":[28],"describing":[29],"all":[30],"stabilizing":[31],"controllers,":[32],"methods":[33,164],"based":[34,103],"on":[35,60,104],"Youla":[37],"system-level":[39],"synthesis":[40],"are":[41],"significantly":[42],"impacted":[43],"by":[44,82,92,182],"the":[45,68,84,87,115,171],"difficulty":[46],"of":[47,70,86,119,173],"parametrizing":[48],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">p</inf>-stable":[51,97],"operators.":[52],"In":[53],"contrast,":[54],"MAD":[55,120,130,155],"policies":[56,121,131,156,177],"explicit":[58],"feedback":[59],"state-dependent":[61,105],"features":[62,106],"\u2013":[63,74,169],"key":[65],"element":[66],"behind":[67],"success":[69],"pipelines":[73],"without":[75],"jeopardizing":[76],"stability.":[78,150],"This":[79],"is":[80],"achieved":[81],"letting":[83],"control":[88],"input":[89],"be":[90],"described":[91],"disturbance-feedback":[94,127],"operator,":[98],"while":[99,178],"selecting":[100],"its":[101],"through":[107],"universal":[109],"function":[110],"approximator.":[111],"further":[113],"characterize":[114],"robust":[116],"properties":[118],"under":[122],"model":[123,145],"mismatch.":[124],"Unlike":[125],"existing":[126],"parametrizations,":[129],"state-feedback":[133],"components":[134],"compatible":[135],"with":[136,143,158],"model-free":[137],"RL":[138],"pipelines,":[139],"ensuring":[140],"no":[144],"information":[146],"beyond":[147],"assuming":[148],"open-loop":[149],"Numerical":[151],"experiments":[152],"show":[153],"trained":[157],"deep":[159],"deterministic":[160],"gradient":[162],"(DDPG)":[163],"generalize":[165],"to":[166],"unseen":[167],"scenarios":[168],"matching":[170],"performance":[172],"standard":[174],"neural":[175],"network":[176],"guaranteeing":[179],"design.":[183]},"counts_by_year":[],"updated_date":"2026-02-23T20:09:44.859080","created_date":"2026-01-14T00:00:00"}
