{"id":"https://openalex.org/W4401413934","doi":"https://doi.org/10.1109/icra57147.2024.10611560","title":"An Efficient Model-Based Approach on Learning Agile Motor Skills without Reinforcement","display_name":"An Efficient Model-Based Approach on Learning Agile Motor Skills without Reinforcement","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401413934","doi":"https://doi.org/10.1109/icra57147.2024.10611560"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10611560","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611560","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"conference-paper","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021923513","display_name":"Haojie Shi","orcid":"https://orcid.org/0000-0002-3051-4227"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haojie Shi","raw_affiliation_strings":["Tencent Robotics X,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Robotics X,China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083545249","display_name":"Tingguang Li","orcid":"https://orcid.org/0000-0003-1161-9987"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tingguang Li","raw_affiliation_strings":["Tencent Robotics X,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Robotics X,China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102561539","display_name":"Qingxu Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingxu Zhu","raw_affiliation_strings":["Tencent Robotics X,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Robotics X,China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029261791","display_name":"Jiapeng Sheng","orcid":"https://orcid.org/0000-0001-7436-7997"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiapeng Sheng","raw_affiliation_strings":["Tencent Robotics X,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Robotics X,China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106558017","display_name":"Lei Han","orcid":"https://orcid.org/0009-0005-4507-7698"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Han","raw_affiliation_strings":["Tencent Robotics X,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Robotics X,China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021531143","display_name":"Max Q.\u2010H. Meng","orcid":"https://orcid.org/0000-0002-5255-5898"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Max Q.-H. Meng","raw_affiliation_strings":["Southern University of Science and Technology,Shenzhen Key Laboratory of Robotics Perception and Intelligence,Department of Electronic and Electrical Engineering,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Shenzhen Key Laboratory of Robotics Perception and Intelligence,Department of Electronic and Electrical Engineering,Shenzhen,China","institution_ids":["https://openalex.org/I3045169105"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5724","last_page":"5730"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8065998554229736},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.705388605594635},{"id":"https://openalex.org/keywords/agile-software-development","display_name":"Agile software development","score":0.7001309394836426},{"id":"https://openalex.org/keywords/motor-skill","display_name":"Motor skill","score":0.4493107497692108},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4341728687286377},{"id":"https://openalex.org/keywords/motor-learning","display_name":"Motor learning","score":0.41206037998199463},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3759905993938446},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3450828790664673},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.2125076949596405},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.18495634198188782},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.16485431790351868},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.09405344724655151}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8065998554229736},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.705388605594635},{"id":"https://openalex.org/C14185376","wikidata":"https://www.wikidata.org/wiki/Q30232","display_name":"Agile software development","level":2,"score":0.7001309394836426},{"id":"https://openalex.org/C169976356","wikidata":"https://www.wikidata.org/wiki/Q13208902","display_name":"Motor skill","level":2,"score":0.4493107497692108},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4341728687286377},{"id":"https://openalex.org/C107690735","wikidata":"https://www.wikidata.org/wiki/Q852461","display_name":"Motor learning","level":2,"score":0.41206037998199463},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3759905993938446},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3450828790664673},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2125076949596405},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.18495634198188782},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.16485431790351868},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.09405344724655151},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10611560","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611560","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.46000000834465027,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W1959608418","https://openalex.org/W2120179083","https://openalex.org/W2529220408","https://openalex.org/W2605102758","https://openalex.org/W2736601468","https://openalex.org/W2744484133","https://openalex.org/W2763052367","https://openalex.org/W2774366155","https://openalex.org/W2785389871","https://openalex.org/W2788030459","https://openalex.org/W2788200022","https://openalex.org/W2796290181","https://openalex.org/W2850053679","https://openalex.org/W2859967432","https://openalex.org/W2909331752","https://openalex.org/W2909553221","https://openalex.org/W2951805468","https://openalex.org/W2963184939","https://openalex.org/W2995298643","https://openalex.org/W3039737909","https://openalex.org/W3093922502","https://openalex.org/W3102715624","https://openalex.org/W3104515094","https://openalex.org/W3104876774","https://openalex.org/W3105372678","https://openalex.org/W3111294788","https://openalex.org/W3122690883","https://openalex.org/W3199990163","https://openalex.org/W3204973825","https://openalex.org/W3207033168","https://openalex.org/W4306177961","https://openalex.org/W4312900898","https://openalex.org/W4315706776","https://openalex.org/W4387595590","https://openalex.org/W4389339407","https://openalex.org/W4389667415","https://openalex.org/W4400362143","https://openalex.org/W6638018090","https://openalex.org/W6640963894","https://openalex.org/W6741002519","https://openalex.org/W6747473740","https://openalex.org/W6748519856","https://openalex.org/W6753183898","https://openalex.org/W6754471908","https://openalex.org/W6771217966","https://openalex.org/W6783047220","https://openalex.org/W6783988234","https://openalex.org/W6793715182","https://openalex.org/W6801964084","https://openalex.org/W6839204832","https://openalex.org/W6845226490","https://openalex.org/W6848588639","https://openalex.org/W6858179378"],"related_works":["https://openalex.org/W4308126387","https://openalex.org/W1970523025","https://openalex.org/W2801972429","https://openalex.org/W2013588380","https://openalex.org/W3185285019","https://openalex.org/W2071080072","https://openalex.org/W2318314916","https://openalex.org/W2368697114","https://openalex.org/W1967161469","https://openalex.org/W2347647442"],"abstract_inverted_index":{"Learning-based":[0],"methods":[1,113],"have":[2],"improved":[3],"locomotion":[4],"skills":[5],"of":[6],"quadruped":[7],"robots":[8],"through":[9],"deep":[10],"reinforcement":[11,111],"learning.":[12],"However,":[13],"the":[14,23,76],"sim-to-real":[15],"gap":[16],"and":[17,56,82,98,133,139],"low":[18],"sample":[19,106],"efficiency":[20,107],"still":[21],"limit":[22],"skill":[24],"transfer.":[25],"To":[26],"address":[27],"this":[28],"issue,":[29],"we":[30],"propose":[31],"an":[32],"efficient":[33],"model-based":[34],"learning":[35,112],"framework":[36],"that":[37],"combines":[38],"a":[39,43,48,62,91,104,128],"world":[40,50],"model":[41,51],"with":[42,126],"policy":[44,66,86,121],"network.":[45],"We":[46,88],"train":[47],"differentiable":[49],"to":[52,59,68,94,110,136],"predict":[53],"future":[54],"states":[55],"use":[57],"it":[58],"directly":[60],"supervise":[61],"Variational":[63],"Autoencoder":[64],"(VAE)-based":[65],"network":[67,93],"imitate":[69],"real":[70,79],"animal":[71],"behaviors.":[72],"This":[73],"significantly":[74],"reduces":[75],"need":[77],"for":[78,84],"interaction":[80],"data":[81,130],"allows":[83],"rapid":[85],"updates.":[87],"also":[89],"develop":[90],"high-level":[92],"track":[95],"diverse":[96],"commands":[97],"trajectories.":[99],"Our":[100],"simulated":[101],"results":[102],"show":[103],"tenfold":[105],"increase":[108],"compared":[109],"such":[114],"as":[115],"PPO.":[116],"In":[117],"real-world":[118],"testing,":[119],"our":[120],"achieves":[122],"proficient":[123],"command-following":[124],"performance":[125],"only":[127],"two-minute":[129],"collection":[131],"period":[132],"generalizes":[134],"well":[135],"new":[137],"speeds":[138],"paths.":[140]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-07-14T23:27:15.235271","created_date":"2025-10-10T00:00:00"}