{"id":"https://openalex.org/W3153125130","doi":"https://doi.org/10.1109/cdc45484.2021.9683494","title":"Inverse Reinforcement Learning: A Control Lyapunov Approach","display_name":"Inverse Reinforcement Learning: A Control Lyapunov Approach","publication_year":2021,"publication_date":"2021-12-14","ids":{"openalex":"https://openalex.org/W3153125130","doi":"https://doi.org/10.1109/cdc45484.2021.9683494","mag":"3153125130"},"language":"en","primary_location":{"id":"doi:10.1109/cdc45484.2021.9683494","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc45484.2021.9683494","pdf_url":null,"source":{"id":"https://openalex.org/S4363607724","display_name":"2021 60th IEEE Conference on Decision and Control (CDC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 60th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2104.04483","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053407388","display_name":"Samuel Tesfazgi","orcid":"https://orcid.org/0009-0000-7298-6073"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Samuel Tesfazgi","raw_affiliation_strings":["Technical University of Munich,Department of Electrical and Computer Engineering,Munich,Germany,80333"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich,Department of Electrical and Computer Engineering,Munich,Germany,80333","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023397248","display_name":"Armin Lederer","orcid":"https://orcid.org/0000-0001-6263-5608"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Armin Lederer","raw_affiliation_strings":["Technical University of Munich,Department of Electrical and Computer Engineering,Munich,Germany,80333"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich,Department of Electrical and Computer Engineering,Munich,Germany,80333","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024376647","display_name":"Sandra Hirche","orcid":"https://orcid.org/0000-0001-7819-5926"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sandra Hirche","raw_affiliation_strings":["Technical University of Munich,Department of Electrical and Computer Engineering,Munich,Germany,80333","Technical University of Munich"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich,Department of Electrical and Computer Engineering,Munich,Germany,80333","institution_ids":["https://openalex.org/I62916508"]},{"raw_affiliation_string":"Technical University of Munich","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5053407388"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":0.12261971,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.30514809,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"3627","last_page":"3632"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9764000177383423,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lyapunov-function","display_name":"Lyapunov function","score":0.6258651614189148},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.6164204478263855},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6068955659866333},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5607321262359619},{"id":"https://openalex.org/keywords/control-lyapunov-function","display_name":"Control-Lyapunov function","score":0.5359317660331726},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.529167890548706},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5161967873573303},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.51343834400177},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.49889683723449707},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4634523093700409},{"id":"https://openalex.org/keywords/inverse","display_name":"Inverse","score":0.4614812731742859},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.42486053705215454},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.423037588596344},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3921279311180115},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.307578980922699},{"id":"https://openalex.org/keywords/lyapunov-redesign","display_name":"Lyapunov redesign","score":0.297490656375885},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24712568521499634},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2381424903869629},{"id":"https://openalex.org/keywords/lyapunov-exponent","display_name":"Lyapunov exponent","score":0.15882685780525208},{"id":"https://openalex.org/keywords/chaotic","display_name":"Chaotic","score":0.1469399333000183},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13622528314590454},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.07330211997032166}],"concepts":[{"id":"https://openalex.org/C60640748","wikidata":"https://www.wikidata.org/wiki/Q2337858","display_name":"Lyapunov function","level":3,"score":0.6258651614189148},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.6164204478263855},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6068955659866333},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5607321262359619},{"id":"https://openalex.org/C201030206","wikidata":"https://www.wikidata.org/wiki/Q5165805","display_name":"Control-Lyapunov function","level":5,"score":0.5359317660331726},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.529167890548706},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5161967873573303},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.51343834400177},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.49889683723449707},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4634523093700409},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.4614812731742859},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.42486053705215454},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.423037588596344},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3921279311180115},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.307578980922699},{"id":"https://openalex.org/C37935115","wikidata":"https://www.wikidata.org/wiki/Q6707085","display_name":"Lyapunov redesign","level":4,"score":0.297490656375885},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24712568521499634},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2381424903869629},{"id":"https://openalex.org/C191544260","wikidata":"https://www.wikidata.org/wiki/Q1238630","display_name":"Lyapunov exponent","level":3,"score":0.15882685780525208},{"id":"https://openalex.org/C2777052490","wikidata":"https://www.wikidata.org/wiki/Q5072826","display_name":"Chaotic","level":2,"score":0.1469399333000183},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13622528314590454},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.07330211997032166},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/cdc45484.2021.9683494","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc45484.2021.9683494","pdf_url":null,"source":{"id":"https://openalex.org/S4363607724","display_name":"2021 60th IEEE Conference on Decision and Control (CDC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 60th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2104.04483","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.04483","pdf_url":"https://arxiv.org/pdf/2104.04483","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3153125130","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2104.04483.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2104.04483","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.04483","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2104.04483","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.04483","pdf_url":"https://arxiv.org/pdf/2104.04483","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1540155273","https://openalex.org/W1746819321","https://openalex.org/W1970832114","https://openalex.org/W1991667713","https://openalex.org/W1999874108","https://openalex.org/W2001951765","https://openalex.org/W2004412503","https://openalex.org/W2012487366","https://openalex.org/W2012525071","https://openalex.org/W2098774185","https://openalex.org/W2136719407","https://openalex.org/W2168029744","https://openalex.org/W2298163601","https://openalex.org/W2604382266","https://openalex.org/W2735248836","https://openalex.org/W2913243980","https://openalex.org/W2963590100","https://openalex.org/W3012086497","https://openalex.org/W3098713169","https://openalex.org/W3101749733","https://openalex.org/W3155247195","https://openalex.org/W6674884181","https://openalex.org/W6696380822","https://openalex.org/W6758804334"],"related_works":["https://openalex.org/W2984409990","https://openalex.org/W3130596985","https://openalex.org/W3207632880","https://openalex.org/W3003474222","https://openalex.org/W2211252706","https://openalex.org/W2202743478","https://openalex.org/W2995634437","https://openalex.org/W3152815381","https://openalex.org/W181569489","https://openalex.org/W2031067035","https://openalex.org/W2753088790","https://openalex.org/W2117675763","https://openalex.org/W2562989799","https://openalex.org/W2892172348","https://openalex.org/W2100401322","https://openalex.org/W3131283938","https://openalex.org/W3130800560","https://openalex.org/W3082495008","https://openalex.org/W3200276130","https://openalex.org/W2397557326"],"abstract_inverted_index":{"Inferring":[0],"the":[1,30,38,69,83,99,112],"intent":[2,58],"of":[3,32,106,114],"an":[4,51],"intelligent":[5],"agent":[6],"from":[7,79,120],"demonstrations":[8,80,123],"and":[9,59],"subsequently":[10],"predicting":[11],"its":[12,57,61],"behavior,":[13],"is":[14,29,45,92],"a":[15,42,94,125],"critical":[16],"task":[17],"in":[18,124],"many":[19],"collaborative":[20],"settings.":[21],"A":[22],"common":[23],"approach":[24],"to":[25,47,50,73],"solve":[26],"this":[27,65],"problem":[28,72],"framework":[31],"inverse":[33,84],"reinforcement":[34],"learning":[35,74,119],"(IRL),":[36],"where":[37],"observed":[39],"agent,":[40],"e.g.,":[41],"human":[43],"demonstrator,":[44],"assumed":[46],"behave":[48],"according":[49],"intrinsic":[52],"cost":[53],"function":[54],"that":[55,89],"reflects":[56],"informs":[60],"control":[62,75,108],"actions.":[63],"In":[64],"work,":[66],"we":[67],"reformulate":[68],"IRL":[70],"inference":[71],"Lyapunov":[76],"functions":[77],"(CLF)":[78],"by":[81,118],"exploiting":[82],"optimality":[85],"property,":[86],"which":[87],"states":[88],"every":[90],"CLF":[91,101],"also":[93],"meaningful":[95],"value":[96],"function.":[97],"Moreover,":[98],"derived":[100],"formulation":[102],"directly":[103],"guarantees":[104],"stability":[105],"inferred":[107],"policies.":[109],"We":[110],"show":[111],"flexibility":[113],"our":[115],"proposed":[116],"method":[117],"goal-directed":[121],"movement":[122],"continuous":[126],"environment.":[127]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
