{"id":"https://openalex.org/W4416748605","doi":"https://doi.org/10.1109/iros60139.2025.11245929","title":"Distillation-PPO: A Novel Two-Stage Reinforcement Learning Framework for Humanoid Robot Perceptive Locomotion","display_name":"Distillation-PPO: A Novel Two-Stage Reinforcement Learning Framework for Humanoid Robot Perceptive Locomotion","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416748605","doi":"https://doi.org/10.1109/iros60139.2025.11245929"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11245929","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11245929","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100381869","display_name":"Qiang Zhang","orcid":"https://orcid.org/0000-0001-8519-5158"},"institutions":[{"id":"https://openalex.org/I4210165198","display_name":"Beijing Advanced Sciences and Innovation Center","ror":"https://ror.org/05qm21180","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165198"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qiang Zhang","raw_affiliation_strings":["Beijing Innovation Center of Humanoid Robotics Co. Ltd"],"affiliations":[{"raw_affiliation_string":"Beijing Innovation Center of Humanoid Robotics Co. Ltd","institution_ids":["https://openalex.org/I4210165198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100899705","display_name":"Gang Han","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165198","display_name":"Beijing Advanced Sciences and Innovation Center","ror":"https://ror.org/05qm21180","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165198"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Han","raw_affiliation_strings":["Beijing Innovation Center of Humanoid Robotics Co. Ltd"],"affiliations":[{"raw_affiliation_string":"Beijing Innovation Center of Humanoid Robotics Co. Ltd","institution_ids":["https://openalex.org/I4210165198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049574394","display_name":"Jingkai Sun","orcid":"https://orcid.org/0000-0002-1032-2957"},"institutions":[{"id":"https://openalex.org/I4210165198","display_name":"Beijing Advanced Sciences and Innovation Center","ror":"https://ror.org/05qm21180","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165198"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingkai Sun","raw_affiliation_strings":["Beijing Innovation Center of Humanoid Robotics Co. Ltd"],"affiliations":[{"raw_affiliation_string":"Beijing Innovation Center of Humanoid Robotics Co. Ltd","institution_ids":["https://openalex.org/I4210165198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111526799","display_name":"Wen Zhao","orcid":"https://orcid.org/0009-0002-9728-3950"},"institutions":[{"id":"https://openalex.org/I4210165198","display_name":"Beijing Advanced Sciences and Innovation Center","ror":"https://ror.org/05qm21180","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165198"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Zhao","raw_affiliation_strings":["Beijing Innovation Center of Humanoid Robotics Co. Ltd"],"affiliations":[{"raw_affiliation_string":"Beijing Innovation Center of Humanoid Robotics Co. Ltd","institution_ids":["https://openalex.org/I4210165198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000895872","display_name":"Chenghao Sun","orcid":"https://orcid.org/0009-0002-7158-400X"},"institutions":[{"id":"https://openalex.org/I4210165198","display_name":"Beijing Advanced Sciences and Innovation Center","ror":"https://ror.org/05qm21180","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165198"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenghao Sun","raw_affiliation_strings":["Beijing Innovation Center of Humanoid Robotics Co. Ltd"],"affiliations":[{"raw_affiliation_string":"Beijing Innovation Center of Humanoid Robotics Co. Ltd","institution_ids":["https://openalex.org/I4210165198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015422529","display_name":"Jiahang Cao","orcid":"https://orcid.org/0000-0003-4338-4414"},"institutions":[{"id":"https://openalex.org/I4210165198","display_name":"Beijing Advanced Sciences and Innovation Center","ror":"https://ror.org/05qm21180","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165198"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiahang Cao","raw_affiliation_strings":["Beijing Innovation Center of Humanoid Robotics Co. Ltd"],"affiliations":[{"raw_affiliation_string":"Beijing Innovation Center of Humanoid Robotics Co. Ltd","institution_ids":["https://openalex.org/I4210165198"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101468620","display_name":"Jiaxu Wang","orcid":"https://orcid.org/0000-0003-1277-6896"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiaxu Wang","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou),China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou),China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100572173","display_name":"Yijie Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165198","display_name":"Beijing Advanced Sciences and Innovation Center","ror":"https://ror.org/05qm21180","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165198"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yijie Guo","raw_affiliation_strings":["Beijing Innovation Center of Humanoid Robotics Co. Ltd"],"affiliations":[{"raw_affiliation_string":"Beijing Innovation Center of Humanoid Robotics Co. Ltd","institution_ids":["https://openalex.org/I4210165198"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109900808","display_name":"Renjing Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Renjing Xu","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou),China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou),China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100381869"],"corresponding_institution_ids":["https://openalex.org/I4210165198"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38277124,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2916","last_page":"2922"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9059000015258789,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9059000015258789,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.02329999953508377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.014700000174343586,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7598000168800354},{"id":"https://openalex.org/keywords/partially-observable-markov-decision-process","display_name":"Partially observable Markov decision process","score":0.6832000017166138},{"id":"https://openalex.org/keywords/humanoid-robot","display_name":"Humanoid robot","score":0.6782000064849854},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6370999813079834},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.582099974155426},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.5185999870300293},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5157999992370605},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.47850000858306885}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7598000168800354},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.6832000017166138},{"id":"https://openalex.org/C60692881","wikidata":"https://www.wikidata.org/wiki/Q584529","display_name":"Humanoid robot","level":3,"score":0.6782000064849854},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6370999813079834},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6072999835014343},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.582099974155426},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5185999870300293},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5157999992370605},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.491100013256073},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.47850000858306885},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.42399999499320984},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39489999413490295},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.38830000162124634},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.3804999887943268},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.37540000677108765},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.34929999709129333},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3237999975681305},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.29809999465942383},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.29429998993873596},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.2635999917984009}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11245929","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11245929","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2141377539","https://openalex.org/W2951360122","https://openalex.org/W2972798201","https://openalex.org/W3038194455","https://openalex.org/W3093922502","https://openalex.org/W3175254947","https://openalex.org/W3176539729","https://openalex.org/W3206762371","https://openalex.org/W4210423514","https://openalex.org/W4285159888","https://openalex.org/W4312238192","https://openalex.org/W4312749288","https://openalex.org/W4383109295","https://openalex.org/W4385430644","https://openalex.org/W4385486316","https://openalex.org/W4385489702","https://openalex.org/W4386075970","https://openalex.org/W4390938336","https://openalex.org/W4392763392","https://openalex.org/W4401415792","https://openalex.org/W4401416668","https://openalex.org/W4401417431","https://openalex.org/W4402350775","https://openalex.org/W4403741606","https://openalex.org/W4405785208"],"related_works":[],"abstract_inverted_index":{"In":[0,57],"recent":[1],"years,":[2],"humanoid":[3,41],"robots":[4],"have":[5],"garnered":[6],"significant":[7],"attention":[8],"from":[9,124,142],"both":[10],"academia":[11],"and":[12,21,54,71,85,120,153,187,235,245],"industry":[13],"due":[14,136],"to":[15,19,93,104,137,185,203,211],"their":[16],"high":[17],"adaptability":[18],"environments":[20,53],"human-like":[22],"characteristics.":[23],"With":[24],"the":[25,37,58,95,105,111,115,138,171,189,193,198,206,218],"rapid":[26],"advancement":[27],"of":[28,40,60,117,140,173,200],"reinforcement":[29,133,201],"learning,":[30],"substantial":[31],"progress":[32],"has":[33],"been":[34],"made":[35],"in":[36,81,151,157,177,213,237,248],"walking":[38],"control":[39],"robots.":[42],"However,":[43,135],"existing":[44,63],"methods":[45,70,75,147],"still":[46],"face":[47,149],"challenges":[48],"when":[49],"dealing":[50],"with":[51],"complex":[52],"irregular":[55],"terrains.":[56],"field":[59],"perceptive":[61,166],"locomotion,":[62],"approaches":[64],"are":[65],"generally":[66],"divided":[67],"into":[68],"two-stage":[69,165,228],"end-to-end":[72,146],"methods.":[73],"Two-stage":[74],"first":[76],"train":[77],"a":[78,82,125,143,178,214],"teacher":[79,144,174],"policy":[80,208],"simulated":[83,238],"environment":[84],"then":[86],"use":[87],"distillation":[88],"techniques,":[89],"such":[90],"as":[91,99],"DAgger,":[92],"transfer":[94],"privileged":[96,118],"information":[97,119],"learned":[98,176],"latent":[100],"features":[101],"or":[102],"actions":[103],"student":[106,190,207],"policy.":[107,191],"End-to-end":[108],"methods,":[109],"on":[110],"other":[112],"hand,":[113],"forgo":[114],"learning":[116,202],"directly":[121],"learn":[122,212],"policies":[123,175],"partially":[126],"observable":[127,180],"Markov":[128,181],"decision":[129,182],"process":[130,183],"(POMDP)":[131],"through":[132],"learning.":[134],"lack":[139],"supervision":[141],"policy,":[145],"often":[148],"difficulties":[150],"training":[152,229,233],"exhibit":[154],"unstable":[155],"performance":[156],"real-world":[158,249],"applications.":[159,250],"This":[160],"paper":[161],"proposes":[162],"an":[163],"innovative":[164],"locomotion":[167],"framework":[168,230],"that":[169,205,226],"combines":[170],"advantages":[172],"fully":[179],"(MDP)":[184],"regularize":[186],"supervise":[188],"At":[192],"same":[194],"time,":[195],"it":[196],"leverages":[197],"characteristics":[199],"ensure":[204],"can":[209],"continue":[210],"POMDP,":[215],"thereby":[216],"enhancing":[217],"model\u2019s":[219],"upper":[220],"bound.":[221],"Our":[222],"experimental":[223],"results":[224],"demonstrate":[225],"our":[227],"achieves":[231],"higher":[232],"efficiency":[234],"stability":[236],"environments,":[239],"while":[240],"also":[241],"exhibiting":[242],"better":[243],"robustness":[244],"generalization":[246],"capabilities":[247]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-28T00:00:00"}
