{"id":"https://openalex.org/W7131109130","doi":"https://doi.org/10.1109/robio66223.2025.11378044","title":"Learning to Switch Gait Randomly and Continuously of Bipedal Robots via Stage-Wise Reward Shaping*","display_name":"Learning to Switch Gait Randomly and Continuously of Bipedal Robots via Stage-Wise Reward Shaping*","publication_year":2025,"publication_date":"2025-12-03","ids":{"openalex":"https://openalex.org/W7131109130","doi":"https://doi.org/10.1109/robio66223.2025.11378044"},"language":null,"primary_location":{"id":"doi:10.1109/robio66223.2025.11378044","is_oa":false,"landing_page_url":"https://doi.org/10.1109/robio66223.2025.11378044","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Biomimetics (ROBIO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126630775","display_name":"Chiyao Li","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chiyao Li","raw_affiliation_strings":["School of Robotics and Advanced Manufacturing, Harbin Institute of Technology,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"School of Robotics and Advanced Manufacturing, Harbin Institute of Technology,Shenzhen,China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005171833","display_name":"Shilong Sun","orcid":"https://orcid.org/0000-0003-0460-4592"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shilong Sun","raw_affiliation_strings":["School of Robotics and Advanced Manufacturing, Harbin Institute of Technology,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"School of Robotics and Advanced Manufacturing, Harbin Institute of Technology,Shenzhen,China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004901154","display_name":"Haodong Huang","orcid":"https://orcid.org/0000-0002-3173-0861"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haodong Huang","raw_affiliation_strings":["School of Robotics and Advanced Manufacturing, Harbin Institute of Technology,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"School of Robotics and Advanced Manufacturing, Harbin Institute of Technology,Shenzhen,China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100417704","display_name":"Yuanpeng Wang","orcid":"https://orcid.org/0000-0001-8989-0510"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanpeng Wang","raw_affiliation_strings":["School of Robotics and Advanced Manufacturing, Harbin Institute of Technology,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"School of Robotics and Advanced Manufacturing, Harbin Institute of Technology,Shenzhen,China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103865745","display_name":"Wenfu Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenfu Xu","raw_affiliation_strings":["School of Robotics and Advanced Manufacturing, Harbin Institute of Technology,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"School of Robotics and Advanced Manufacturing, Harbin Institute of Technology,Shenzhen,China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5126630775"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.63388525,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1964","last_page":"1970"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.7638000249862671,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.7638000249862671,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.033399999141693115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.027699999511241913,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/jumping","display_name":"Jumping","score":0.7556999921798706},{"id":"https://openalex.org/keywords/traverse","display_name":"Traverse","score":0.755299985408783},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.7330999970436096},{"id":"https://openalex.org/keywords/terrain","display_name":"Terrain","score":0.5864999890327454},{"id":"https://openalex.org/keywords/gait","display_name":"Gait","score":0.5472000241279602},{"id":"https://openalex.org/keywords/robot-locomotion","display_name":"Robot locomotion","score":0.5156000256538391},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.4666000008583069},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.4553000032901764}],"concepts":[{"id":"https://openalex.org/C2779897013","wikidata":"https://www.wikidata.org/wiki/Q1151752","display_name":"Jumping","level":2,"score":0.7556999921798706},{"id":"https://openalex.org/C176809094","wikidata":"https://www.wikidata.org/wiki/Q15401496","display_name":"Traverse","level":2,"score":0.755299985408783},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.7330999970436096},{"id":"https://openalex.org/C161840515","wikidata":"https://www.wikidata.org/wiki/Q186131","display_name":"Terrain","level":2,"score":0.5864999890327454},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5771999955177307},{"id":"https://openalex.org/C151800584","wikidata":"https://www.wikidata.org/wiki/Q2370000","display_name":"Gait","level":2,"score":0.5472000241279602},{"id":"https://openalex.org/C6101204","wikidata":"https://www.wikidata.org/wiki/Q7353391","display_name":"Robot locomotion","level":5,"score":0.5156000256538391},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.4666000008583069},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.4553000032901764},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.4526999890804291},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.4357999861240387},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.4311000108718872},{"id":"https://openalex.org/C24027999","wikidata":"https://www.wikidata.org/wiki/Q2176348","display_name":"Omnidirectional antenna","level":3,"score":0.414900004863739},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36959999799728394},{"id":"https://openalex.org/C2780226923","wikidata":"https://www.wikidata.org/wiki/Q929848","display_name":"Movement (music)","level":2,"score":0.3686999976634979},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.35030001401901245},{"id":"https://openalex.org/C145565327","wikidata":"https://www.wikidata.org/wiki/Q852514","display_name":"Motion control","level":3,"score":0.3368000090122223},{"id":"https://openalex.org/C2776937971","wikidata":"https://www.wikidata.org/wiki/Q4384217","display_name":"Heading (navigation)","level":2,"score":0.3116999864578247},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.30329999327659607},{"id":"https://openalex.org/C192921069","wikidata":"https://www.wikidata.org/wiki/Q550134","display_name":"Inverted pendulum","level":3,"score":0.29260000586509705},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2896000146865845},{"id":"https://openalex.org/C65401140","wikidata":"https://www.wikidata.org/wiki/Q7353385","display_name":"Robot control","level":4,"score":0.28540000319480896},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.2786000072956085},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.2644999921321869}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/robio66223.2025.11378044","is_oa":false,"landing_page_url":"https://doi.org/10.1109/robio66223.2025.11378044","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Biomimetics (ROBIO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2911087563","https://openalex.org/W2963184939","https://openalex.org/W2968774764","https://openalex.org/W3039737909","https://openalex.org/W3093922502","https://openalex.org/W3111294788","https://openalex.org/W3202243720","https://openalex.org/W3206762371","https://openalex.org/W4205430897","https://openalex.org/W4226143977","https://openalex.org/W4296718913","https://openalex.org/W4318953217","https://openalex.org/W4383097434","https://openalex.org/W4383108274","https://openalex.org/W4383108368","https://openalex.org/W4383108958","https://openalex.org/W4383109295","https://openalex.org/W4386038305","https://openalex.org/W4386822465","https://openalex.org/W4390494915","https://openalex.org/W4392763392","https://openalex.org/W4394872716","https://openalex.org/W4396910086","https://openalex.org/W4401415792","https://openalex.org/W4402401628","https://openalex.org/W4412923251"],"related_works":[],"abstract_inverted_index":{"Bipedal":[0],"robots":[1],"capable":[2],"of":[3,12],"continuously":[4],"and":[5,34,46,50,123,142,146],"randomly":[6,57],"switching":[7,31,158],"gaits":[8,144],"acrossvarious":[9],"terrains":[10,156],"are":[11,41],"great":[13],"significance.":[14],"This":[15],"paper":[16],"proposes":[17],"a":[18,25,78,104],"stage-wise":[19],"reward":[20,39,80],"shaping":[21],"method":[22,133],"that":[23,130],"enables":[24,134],"single":[26],"policy":[27],"to":[28,92,116,138,153],"perform":[29],"random":[30],"between":[32,140],"jumping":[33,45,65,141],"walking":[35,47,84,143],"locomotion":[36,54,98],"modes.":[37,160],"Dedicated":[38],"functions":[40],"designed":[42],"for":[43],"the":[44,53,63,83,94,113,118,131,135],"locomotion,":[48],"respectively,":[49],"during":[51],"training,":[52],"mode":[55],"is":[56,90,110],"switched":[58],"at":[59,99],"fixed":[60],"intervals.":[61],"For":[62],"complex":[64,155],"task,":[66],"we":[67],"further":[68],"divide":[69],"it":[70,152],"into":[71,112],"four":[72],"consecutive":[73],"sub-stages,":[74],"each":[75],"shaped":[76],"with":[77],"different":[79],"function.":[81],"In":[82,102],"mode,":[85],"omnidirectional":[86,97],"velocity":[87],"command":[88],"tracking":[89],"used":[91],"train":[93],"robot,":[95],"enabling":[96],"various":[100],"speeds.":[101],"addition,":[103],"long":[105],"short-term":[106],"memory":[107],"(LSTM)":[108],"network":[109],"integrated":[111],"Actor-Critic":[114],"framework":[115],"help":[117],"model":[119],"utilize":[120],"historical":[121],"information":[122],"make":[124],"better":[125],"decisions.":[126],"Simulation":[127],"results":[128],"demonstrate":[129],"proposed":[132],"bipedal":[136],"robot":[137],"switch":[139],"smoothly":[145],"naturally":[147],"in":[148],"arbitrary":[149],"order,":[150],"allowing":[151],"traverse":[154],"by":[157],"gait":[159]},"counts_by_year":[],"updated_date":"2026-02-25T06:17:34.324206","created_date":"2026-02-24T00:00:00"}
