{"id":"https://openalex.org/W4415443934","doi":"https://doi.org/10.1109/tro.2025.3623769","title":"A Differential Dynamic Programming Framework for Inverse Reinforcement Learning","display_name":"A Differential Dynamic Programming Framework for Inverse Reinforcement Learning","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4415443934","doi":"https://doi.org/10.1109/tro.2025.3623769"},"language":null,"primary_location":{"id":"doi:10.1109/tro.2025.3623769","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tro.2025.3623769","pdf_url":null,"source":{"id":"https://openalex.org/S144620930","display_name":"IEEE Transactions on Robotics","issn_l":"1552-3098","issn":["1552-3098","1941-0468"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Robotics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Kun Cao","orcid":"https://orcid.org/0000-0003-4688-1096"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kun Cao","raw_affiliation_strings":["Department of Control Science and Engineering, College of Electronics and Information Engineering, Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-4688-1096","affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, College of Electronics and Information Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064132698","display_name":"Xinhang Xu","orcid":"https://orcid.org/0009-0000-2957-7389"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xinhang Xu","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore","School of Electrical and Electronic Engineering, Nanyang Technological University, 50 Nanyang Avenue, Singapore"],"raw_orcid":"https://orcid.org/0009-0000-2957-7389","affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, 50 Nanyang Avenue, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017350249","display_name":"Wanxin Jin","orcid":"https://orcid.org/0000-0001-5330-855X"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wanxin Jin","raw_affiliation_strings":["School for Engineering of Matter, Transport, and Energy, Arizona State University, Tempe, AZ, USA","School for Engineering of Matter, Transport, and Energy, Arizona State University, USA"],"raw_orcid":"https://orcid.org/0000-0001-5330-855X","affiliations":[{"raw_affiliation_string":"School for Engineering of Matter, Transport, and Energy, Arizona State University, Tempe, AZ, USA","institution_ids":["https://openalex.org/I55732556"]},{"raw_affiliation_string":"School for Engineering of Matter, Transport, and Energy, Arizona State University, USA","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045975901","display_name":"Karl Henrik Johansson","orcid":"https://orcid.org/0000-0001-9940-5929"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Karl H. Johansson","raw_affiliation_strings":["Division of Decision and Control Systems, School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology, Stockholm, Sweden","Division of Decision and Control Systems, School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology, Sweden"],"raw_orcid":"https://orcid.org/0000-0001-9940-5929","affiliations":[{"raw_affiliation_string":"Division of Decision and Control Systems, School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]},{"raw_affiliation_string":"Division of Decision and Control Systems, School of Electrical Engineering and Computer Science, KTH Royal Institute of Technology, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100365448","display_name":"Lihua Xie","orcid":"https://orcid.org/0000-0002-7137-4136"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Lihua Xie","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore","School of Electrical and Electronic Engineering, Nanyang Technological University, 50 Nanyang Avenue, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-7137-4136","affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, 50 Nanyang Avenue, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30781389,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"41","issue":null,"first_page":"6267","last_page":"6286"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.8838000297546387,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.8838000297546387,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/equivalence","display_name":"Equivalence (formal languages)","score":0.5874999761581421},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.583299994468689},{"id":"https://openalex.org/keywords/inverse","display_name":"Inverse","score":0.5529000163078308},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5396000146865845},{"id":"https://openalex.org/keywords/differential-dynamic-programming","display_name":"Differential dynamic programming","score":0.5078999996185303},{"id":"https://openalex.org/keywords/differential","display_name":"Differential (mechanical device)","score":0.46309998631477356},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4043999910354614},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.398499995470047}],"concepts":[{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.5874999761581421},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.583299994468689},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5537999868392944},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.5529000163078308},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5460000038146973},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5396000146865845},{"id":"https://openalex.org/C92292787","wikidata":"https://www.wikidata.org/wiki/Q5275342","display_name":"Differential dynamic programming","level":3,"score":0.5078999996185303},{"id":"https://openalex.org/C93226319","wikidata":"https://www.wikidata.org/wiki/Q193137","display_name":"Differential (mechanical device)","level":2,"score":0.46309998631477356},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4043999910354614},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.398499995470047},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.38989999890327454},{"id":"https://openalex.org/C187523126","wikidata":"https://www.wikidata.org/wiki/Q17098330","display_name":"Inverse dynamics","level":3,"score":0.37059998512268066},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.36149999499320984},{"id":"https://openalex.org/C135252773","wikidata":"https://www.wikidata.org/wiki/Q1567213","display_name":"Inverse problem","level":2,"score":0.3522000014781952},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.3425999879837036},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.33090001344680786},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.3248000144958496},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3181999921798706},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2996000051498413},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.2590000033378601}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tro.2025.3623769","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tro.2025.3623769","pdf_url":null,"source":{"id":"https://openalex.org/S144620930","display_name":"IEEE Transactions on Robotics","issn_l":"1552-3098","issn":["1552-3098","1941-0468"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Robotics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1304962232","display_name":null,"funder_award_id":"62503367","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2575069074","display_name":null,"funder_award_id":"62088101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6347876925","display_name":null,"funder_award_id":"RG64/23","funder_id":"https://openalex.org/F4320320751","funder_display_name":"Ministry of Education - Singapore"}],"funders":[{"id":"https://openalex.org/F4320320751","display_name":"Ministry of Education - Singapore","ror":"https://ror.org/01kcva023"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W586722081","https://openalex.org/W1528604627","https://openalex.org/W1562460111","https://openalex.org/W1587799944","https://openalex.org/W1622730090","https://openalex.org/W1977655452","https://openalex.org/W1978099898","https://openalex.org/W1980516134","https://openalex.org/W1999874108","https://openalex.org/W2027161089","https://openalex.org/W2031264011","https://openalex.org/W2031727844","https://openalex.org/W2069671092","https://openalex.org/W2107308405","https://openalex.org/W2130527887","https://openalex.org/W2158370675","https://openalex.org/W2169498096","https://openalex.org/W2737702598","https://openalex.org/W2737829489","https://openalex.org/W2765164677","https://openalex.org/W2774021736","https://openalex.org/W2907126380","https://openalex.org/W3004162504","https://openalex.org/W3038972086","https://openalex.org/W3081754649","https://openalex.org/W3123819824","https://openalex.org/W3156476459","https://openalex.org/W3172835839","https://openalex.org/W3204169784","https://openalex.org/W3205548345","https://openalex.org/W3205884071","https://openalex.org/W4213251304","https://openalex.org/W4221144238","https://openalex.org/W4282038053","https://openalex.org/W4289716892","https://openalex.org/W4293775970","https://openalex.org/W4362722548","https://openalex.org/W4386248264","https://openalex.org/W6922480057"],"related_works":[],"abstract_inverted_index":{"A":[0],"differential":[1],"dynamic":[2],"programming":[3],"(DDP)-based":[4],"framework":[5,43,92,129,164],"for":[6,36],"inverse":[7,55,134],"reinforcement":[8],"learning":[9,153],"(IRL)":[10],"is":[11,76,93,101,111,149,165],"introduced":[12],"to":[13,46,103,113,131],"recover":[14],"the":[15,18,37,49,53,65,105,117,126,152,159,181,186,190],"parameters":[16,154],"in":[17,52],"cost":[19],"function,":[20,88],"system":[21],"dynamics,":[22],"and":[23,59,67,144,173,184],"constraints":[24],"from":[25,28,158],"demonstrations.":[26,109],"Different":[27],"existing":[29,68],"work,":[30],"where":[31],"DDP":[32],"was":[33],"usually":[34],"used":[35,119],"inner":[38],"forward":[39],"problem,":[40],"our":[41],"proposed":[42,66,102,163],"uses":[44],"it":[45,148],"efficiently":[47],"compute":[48],"gradient":[50],"required":[51],"outer":[54],"problem":[56,137],"with":[57,84],"equality":[58],"inequality":[60],"constraints.":[61],"The":[62,162,178],"equivalence":[63],"between":[64],"methods":[69],"based":[70],"on":[71],"Pontryagin's":[72],"Maximum":[73],"Principle":[74],"(PMP)":[75],"established.":[77],"More":[78],"importantly,":[79],"using":[80],"this":[81,96],"DDP-based":[82],"IRL":[83,91,128],"an":[85],"open-loop":[86,120],"loss":[87,99,121],"a":[89,98,132,145],"closed-loop":[90,106,127],"presented.":[94],"In":[95],"framework,":[97],"function":[100],"capture":[104],"nature":[107],"of":[108,189],"It":[110],"shown":[112],"be":[114,156],"better":[115],"than":[116],"commonly":[118],"function.":[122],"We":[123],"show":[124],"that":[125,151],"reduces":[130],"constrained":[133],"optimal":[135],"control":[136],"under":[138],"certain":[139],"assumptions.":[140],"Under":[141],"these":[142],"assumptions":[143],"rank":[146],"condition,":[147],"proven":[150],"can":[155],"recovered":[157],"demonstration":[160],"data.":[161],"extensively":[166],"evaluated":[167],"through":[168],"four":[169],"numerical":[170],"robot":[171],"examples":[172],"one":[174],"real-world":[175],"quadrotor":[176],"system.":[177],"experiments":[179],"validate":[180],"theoretical":[182],"results":[183],"illustrate":[185],"practical":[187],"relevance":[188],"approach.":[191]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-24T00:00:00"}
