{"id":"https://openalex.org/W4383109295","doi":"https://doi.org/10.1109/icra48891.2023.10160885","title":"Benchmarking Potential Based Rewards for Learning Humanoid Locomotion","display_name":"Benchmarking Potential Based Rewards for Learning Humanoid Locomotion","publication_year":2023,"publication_date":"2023-05-29","ids":{"openalex":"https://openalex.org/W4383109295","doi":"https://doi.org/10.1109/icra48891.2023.10160885"},"language":"en","primary_location":{"id":"doi:10.1109/icra48891.2023.10160885","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48891.2023.10160885","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2307.10142","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033224913","display_name":"Se Hwan Jeon","orcid":"https://orcid.org/0000-0002-2791-7850"},"institutions":[{"id":"https://openalex.org/I4210134804","display_name":"BioMimetic Systems (United States)","ror":"https://ror.org/0437mk135","country_code":"US","type":"company","lineage":["https://openalex.org/I4210134804"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Se Hwan Jeon","raw_affiliation_strings":["MIT,Biomimetic Robotics Lab","Biomimetic Robotics Lab, MIT"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MIT,Biomimetic Robotics Lab","institution_ids":["https://openalex.org/I4210134804"]},{"raw_affiliation_string":"Biomimetic Robotics Lab, MIT","institution_ids":["https://openalex.org/I4210134804"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084903357","display_name":"Steve Heim","orcid":"https://orcid.org/0000-0002-4916-7464"},"institutions":[{"id":"https://openalex.org/I4210134804","display_name":"BioMimetic Systems (United States)","ror":"https://ror.org/0437mk135","country_code":"US","type":"company","lineage":["https://openalex.org/I4210134804"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steve Heim","raw_affiliation_strings":["MIT,Biomimetic Robotics Lab","Biomimetic Robotics Lab, MIT"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MIT,Biomimetic Robotics Lab","institution_ids":["https://openalex.org/I4210134804"]},{"raw_affiliation_string":"Biomimetic Robotics Lab, MIT","institution_ids":["https://openalex.org/I4210134804"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006671049","display_name":"Charles Khazoom","orcid":"https://orcid.org/0000-0001-7224-1688"},"institutions":[{"id":"https://openalex.org/I4210134804","display_name":"BioMimetic Systems (United States)","ror":"https://ror.org/0437mk135","country_code":"US","type":"company","lineage":["https://openalex.org/I4210134804"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Charles Khazoom","raw_affiliation_strings":["MIT,Biomimetic Robotics Lab","Biomimetic Robotics Lab, MIT"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MIT,Biomimetic Robotics Lab","institution_ids":["https://openalex.org/I4210134804"]},{"raw_affiliation_string":"Biomimetic Robotics Lab, MIT","institution_ids":["https://openalex.org/I4210134804"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101912751","display_name":"Sangbae Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I4210134804","display_name":"BioMimetic Systems (United States)","ror":"https://ror.org/0437mk135","country_code":"US","type":"company","lineage":["https://openalex.org/I4210134804"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sangbae Kim","raw_affiliation_strings":["MIT,Biomimetic Robotics Lab","Biomimetic Robotics Lab, MIT"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MIT,Biomimetic Robotics Lab","institution_ids":["https://openalex.org/I4210134804"]},{"raw_affiliation_string":"Biomimetic Robotics Lab, MIT","institution_ids":["https://openalex.org/I4210134804"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.148,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.86989672,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"9204","last_page":"9210"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.9571999907493591,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7863936424255371},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6687610745429993},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5937268137931824},{"id":"https://openalex.org/keywords/humanoid-robot","display_name":"Humanoid robot","score":0.5874354243278503},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.582252562046051},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5542195439338684},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5272699594497681},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5258165597915649},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.43751391768455505},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40522414445877075},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.35470348596572876},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2057192623615265}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7863936424255371},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6687610745429993},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5937268137931824},{"id":"https://openalex.org/C60692881","wikidata":"https://www.wikidata.org/wiki/Q584529","display_name":"Humanoid robot","level":3,"score":0.5874354243278503},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.582252562046051},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5542195439338684},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5272699594497681},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5258165597915649},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.43751391768455505},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40522414445877075},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.35470348596572876},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2057192623615265},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icra48891.2023.10160885","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48891.2023.10160885","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2307.10142","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.10142","pdf_url":"https://arxiv.org/pdf/2307.10142","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:dspace.mit.edu:1721.1/153602","is_oa":true,"landing_page_url":"https://hdl.handle.net/1721.1/153602","pdf_url":"https://dspace.mit.edu/bitstream/1721.1/153602/2/2307.10142.pdf","source":{"id":"https://openalex.org/S4306400425","display_name":"DSpace@MIT (Massachusetts Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I63966007","host_organization_name":"Massachusetts Institute of Technology","host_organization_lineage":["https://openalex.org/I63966007"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arxiv","raw_type":"http://purl.org/eprint/type/ConferencePaper"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2307.10142","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.10142","pdf_url":"https://arxiv.org/pdf/2307.10142","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7533658472","display_name":null,"funder_award_id":"P2SKP2_194954","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"}],"funders":[{"id":"https://openalex.org/F4320320924","display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","ror":"https://ror.org/00yjd3n13"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4383109295.pdf","grobid_xml":"https://content.openalex.org/works/W4383109295.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W119236796","https://openalex.org/W1191599655","https://openalex.org/W1499408472","https://openalex.org/W1777239053","https://openalex.org/W2061983716","https://openalex.org/W2107297861","https://openalex.org/W2130750514","https://openalex.org/W2151382427","https://openalex.org/W2202549229","https://openalex.org/W2346505884","https://openalex.org/W2617586537","https://openalex.org/W2736601468","https://openalex.org/W2796290181","https://openalex.org/W2908391464","https://openalex.org/W2963647792","https://openalex.org/W3029641972","https://openalex.org/W3035700320","https://openalex.org/W3039737909","https://openalex.org/W3103379718","https://openalex.org/W3104515094","https://openalex.org/W3105904426","https://openalex.org/W3136437429","https://openalex.org/W3176539729","https://openalex.org/W3206338439","https://openalex.org/W3206363598","https://openalex.org/W3206620955","https://openalex.org/W3206762371","https://openalex.org/W4205430897","https://openalex.org/W4280650054","https://openalex.org/W4286963241","https://openalex.org/W4287818847","https://openalex.org/W4292103799","https://openalex.org/W6604948569","https://openalex.org/W6627932998","https://openalex.org/W6638088447","https://openalex.org/W6682205418","https://openalex.org/W6741002519","https://openalex.org/W6778428183","https://openalex.org/W6779282036","https://openalex.org/W6794016191","https://openalex.org/W6801964084","https://openalex.org/W6841950565"],"related_works":["https://openalex.org/W4362597605","https://openalex.org/W4238897586","https://openalex.org/W1574414179","https://openalex.org/W435179959","https://openalex.org/W2619091065","https://openalex.org/W3009056573","https://openalex.org/W2922073769","https://openalex.org/W4297676672","https://openalex.org/W4281702477","https://openalex.org/W2059640416"],"abstract_inverted_index":{"The":[0],"main":[1],"challenge":[2],"in":[3,40,99,128,137],"developing":[4],"effective":[5],"reinforcement":[6],"learning":[7,65,86],"(RL)":[8],"pipelines":[9],"is":[10],"often":[11],"the":[12,16,35,52,64,69,77,141],"design":[13],"and":[14,38,94,97,156],"tuning":[15],"reward":[17,21,58,82,108,143,153],"functions.":[18],"Well-designed":[19],"shaping":[20,59,83,118,154],"can":[22,32,61],"lead":[23],"to":[24,84,92,149,159],"significantly":[25,146],"faster":[26],"learning.":[27],"Naively":[28],"formulated":[29],"rewards,":[30],"however,":[31],"conflict":[33],"with":[34,119],"desired":[36],"behavior":[37],"result":[39],"overfitting":[41],"or":[42],"even":[43],"erratic":[44],"performance":[45],"if":[46],"not":[47],"properly":[48],"tuned.":[49],"In":[50,110],"theory,":[51],"broad":[53],"class":[54],"of":[55,79,107,117],"potential":[56,80],"based":[57,81],"(PBRS)":[60],"help":[62],"guide":[63],"process":[66],"without":[67],"affecting":[68],"optimal":[70],"policy.":[71],"Although":[72],"several":[73],"studies":[74],"have":[75,89],"explored":[76],"use":[78],"accelerate":[85],"convergence,":[87],"most":[88],"been":[90],"limited":[91],"grid-worlds":[93],"low-dimensional":[95],"systems,":[96],"RL":[98],"robotics":[100],"has":[101,133],"predominantly":[102],"relied":[103],"on":[104],"standard":[105,115],"forms":[106,116],"shaping.":[109],"this":[111,129],"paper,":[112],"we":[113],"benchmark":[114],"PBRS":[120,132,142],"for":[121],"a":[122],"humanoid":[123],"robot.":[124],"We":[125],"find":[126],"that":[127],"high-dimensional":[130],"system,":[131],"only":[134],"marginal":[135],"benefits":[136],"convergence":[138],"speed.":[139],"However,":[140],"terms":[144],"are":[145],"more":[147],"robust":[148],"scaling":[150],"than":[151],"typical":[152],"approaches,":[155],"thus":[157],"easier":[158],"tune.":[160]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-13T07:54:00.901334","created_date":"2025-10-10T00:00:00"}
