{"id":"https://openalex.org/W4415594416","doi":"https://doi.org/10.1109/tie.2025.3603082","title":"Chaos-Augmented Reinforcement Learning With Diffusion Models for Robust Legged Robot Locomotion","display_name":"Chaos-Augmented Reinforcement Learning With Diffusion Models for Robust Legged Robot Locomotion","publication_year":2025,"publication_date":"2025-10-27","ids":{"openalex":"https://openalex.org/W4415594416","doi":"https://doi.org/10.1109/tie.2025.3603082"},"language":null,"primary_location":{"id":"doi:10.1109/tie.2025.3603082","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tie.2025.3603082","pdf_url":null,"source":{"id":"https://openalex.org/S58031724","display_name":"IEEE Transactions on Industrial Electronics","issn_l":"0278-0046","issn":["0278-0046","1557-9948"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Industrial Electronics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023837084","display_name":"Hainan Zhang","orcid":"https://orcid.org/0009-0003-9323-0613"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hainan Zhang","raw_affiliation_strings":["Electrical Engineering, Yanshan University, Qinhuangdao, China"],"raw_orcid":"https://orcid.org/0009-0003-9323-0613","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100380201","display_name":"Changchun Hua","orcid":"https://orcid.org/0000-0001-6311-2112"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changchun Hua","raw_affiliation_strings":["Electrical Engineering, Yanshan University, Qinhuangdao, China"],"raw_orcid":"https://orcid.org/0000-0001-6311-2112","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100758850","display_name":"Jiannan Chen","orcid":"https://orcid.org/0000-0003-2106-4743"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiannan Chen","raw_affiliation_strings":["Electrical Engineering, Yanshan University, Qinhuangdao, China"],"raw_orcid":"https://orcid.org/0000-0003-2106-4743","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048529905","display_name":"Xi Luo","orcid":"https://orcid.org/0000-0002-6251-0634"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xi Luo","raw_affiliation_strings":["Electrical Engineering, Yanshan University, Qinhuangdao, China"],"raw_orcid":"https://orcid.org/0000-0002-6251-0634","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112084053","display_name":"Jing Wei","orcid":"https://orcid.org/0009-0000-2350-922X"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Wei","raw_affiliation_strings":["Electrical Engineering, Yanshan University, Qinhuangdao, China"],"raw_orcid":"https://orcid.org/0009-0000-2350-922X","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5023837084"],"corresponding_institution_ids":["https://openalex.org/I39333907"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28469925,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"73","issue":"2","first_page":"2600","last_page":"2609"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11023","display_name":"Prosthetics and Rehabilitation Robotics","score":0.9488999843597412,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10571","display_name":"Robotic Mechanisms and Dynamics","score":0.9391000270843506,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8216000199317932},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.4851999878883362},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.4544000029563904},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4447000026702881},{"id":"https://openalex.org/keywords/chaotic","display_name":"Chaotic","score":0.42980000376701355},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4262000024318695},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4174000024795532},{"id":"https://openalex.org/keywords/lyapunov-exponent","display_name":"Lyapunov exponent","score":0.3846000134944916},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.3806999921798706}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8216000199317932},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6265000104904175},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.4851999878883362},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.4544000029563904},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4447000026702881},{"id":"https://openalex.org/C2777052490","wikidata":"https://www.wikidata.org/wiki/Q5072826","display_name":"Chaotic","level":2,"score":0.42980000376701355},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4262000024318695},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4174000024795532},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4097000062465668},{"id":"https://openalex.org/C191544260","wikidata":"https://www.wikidata.org/wiki/Q1238630","display_name":"Lyapunov exponent","level":3,"score":0.3846000134944916},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.3806999921798706},{"id":"https://openalex.org/C2776829284","wikidata":"https://www.wikidata.org/wiki/Q1341651","display_name":"Lyapunov stability","level":3,"score":0.3578000068664551},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3555999994277954},{"id":"https://openalex.org/C2779908020","wikidata":"https://www.wikidata.org/wiki/Q1424704","display_name":"Legged robot","level":3,"score":0.35089999437332153},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.3508000075817108},{"id":"https://openalex.org/C71134354","wikidata":"https://www.wikidata.org/wiki/Q458825","display_name":"Kernel density estimation","level":3,"score":0.34950000047683716},{"id":"https://openalex.org/C60640748","wikidata":"https://www.wikidata.org/wiki/Q2337858","display_name":"Lyapunov function","level":3,"score":0.3481000065803528},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.34130001068115234},{"id":"https://openalex.org/C77405623","wikidata":"https://www.wikidata.org/wiki/Q598451","display_name":"System dynamics","level":2,"score":0.3379000127315521},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30959999561309814},{"id":"https://openalex.org/C106195933","wikidata":"https://www.wikidata.org/wiki/Q7847935","display_name":"Truncation (statistics)","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C95546049","wikidata":"https://www.wikidata.org/wiki/Q1345207","display_name":"Entropy estimation","level":3,"score":0.2770000100135803},{"id":"https://openalex.org/C116672817","wikidata":"https://www.wikidata.org/wiki/Q1454986","display_name":"Physical system","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C112401455","wikidata":"https://www.wikidata.org/wiki/Q178036","display_name":"Brownian motion","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.26919999718666077},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.26499998569488525},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C79487989","wikidata":"https://www.wikidata.org/wiki/Q934680","display_name":"Vehicle dynamics","level":2,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tie.2025.3603082","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tie.2025.3603082","pdf_url":null,"source":{"id":"https://openalex.org/S58031724","display_name":"IEEE Transactions on Industrial Electronics","issn_l":"0278-0046","issn":["0278-0046","1557-9948"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Industrial Electronics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G160074969","display_name":null,"funder_award_id":"U24A20271","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2254257675","display_name":null,"funder_award_id":"62403183","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5417780894","display_name":null,"funder_award_id":"U22A2050","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2238345790","https://openalex.org/W2766447205","https://openalex.org/W3094402048","https://openalex.org/W3112664346","https://openalex.org/W3162439934","https://openalex.org/W3170422092","https://openalex.org/W3182751084","https://openalex.org/W4286361939","https://openalex.org/W4295832664","https://openalex.org/W4381415941","https://openalex.org/W4386285856","https://openalex.org/W4386718967","https://openalex.org/W4388952994","https://openalex.org/W4394674699","https://openalex.org/W4395680426","https://openalex.org/W4396910086","https://openalex.org/W4401567594","https://openalex.org/W4415797647","https://openalex.org/W6922480057"],"related_works":[],"abstract_inverted_index":{"Diffusion-based":[0],"reinforcement":[1,120],"learning":[2,121],"demonstrates":[3],"superior":[4],"performance":[5],"in":[6],"handling":[7],"multimodal":[8,23,63],"tasks.":[9],"However,":[10],"conventional":[11],"critic":[12],"networks":[13],"relying":[14],"on":[15,167,173,187],"unimodal":[16],"return":[17],"estimation":[18,73],"struggle":[19],"to":[20,60,92,140,148],"capture":[21],"the":[22,29,102,118,137,149,160,168],"nature":[24],"of":[25,31,106,159],"diffusion":[26],"policies,":[27],"while":[28],"sparsity":[30],"high-quality":[32,94],"action":[33,95],"samples":[34],"further":[35],"limits":[36],"policy":[37,71,77,138],"optimization.":[38],"To":[39],"address":[40],"these":[41],"challenges,":[42],"first,":[43],"a":[44,80,109,174],"novel":[45],"Kernel":[46],"Density":[47],"Expected":[48],"Advantage":[49],"Estimation":[50],"(KDEAE)":[51],"approach":[52],"is":[53,115,163,184],"proposed,":[54],"which":[55],"adaptively":[56],"models":[57],"Q-value":[58],"distributions":[59],"fit":[61],"complex":[62],"and":[64,84,90,133,171],"skewed":[65],"data.":[66],"This":[67],"enables":[68],"more":[69],"accurate":[70],"gradient":[72],"that":[74],"significantly":[75],"improves":[76],"expressiveness.":[78],"Additionally,":[79],"soft":[81],"truncation":[82],"mechanism":[83],"maximum":[85,110],"entropy":[86],"framework":[87],"balance":[88],"exploration":[89],"exploitation":[91],"expand":[93],"coverage,":[96],"alleviating":[97],"sample":[98],"sparsity.":[99],"Furthermore,":[100],"targeting":[101],"inherent":[103],"dynamic":[104],"instability":[105],"legged":[107],"robots,":[108],"Lyapunov":[111],"exponent":[112],"regularization":[113],"method":[114,124],"proposed":[116,161],"within":[117],"model-free":[119],"framework.":[122],"The":[123,157,178],"estimates":[125],"system":[126,155],"chaotic":[127],"dynamics":[128],"via":[129],"state-space":[130],"experience":[131],"replay":[132],"incorporates":[134],"it":[135],"into":[136],"loss":[139],"enhance":[141],"robustness.":[142],"Finally,":[143],"safety":[144],"constraints":[145],"are":[146],"applied":[147],"Q-function":[150],"through":[151,165],"Lagrangian":[152],"regularization,":[153],"improving":[154],"safety.":[156],"effectiveness":[158],"algorithm":[162],"validated":[164],"simulations":[166],"MuJoCo":[169],"platform":[170],"experiments":[172],"physical":[175],"robotic":[176],"platform.":[177],"relevant":[179],"code":[180],"for":[181],"this":[182],"study":[183],"publicly":[185],"available":[186],"GitHub:":[188],"<uri":[189],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[190],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/zhn-ya/diffusion-RL</uri>.":[191]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-28T00:00:00"}
