{"id":"https://openalex.org/W4389667262","doi":"https://doi.org/10.1109/iros55552.2023.10342479","title":"RMBench: Benchmarking Deep Reinforcement Learning for Robotic Manipulator Control","display_name":"RMBench: Benchmarking Deep Reinforcement Learning for Robotic Manipulator Control","publication_year":2023,"publication_date":"2023-10-01","ids":{"openalex":"https://openalex.org/W4389667262","doi":"https://doi.org/10.1109/iros55552.2023.10342479"},"language":"en","primary_location":{"id":"doi:10.1109/iros55552.2023.10342479","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342479","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071160327","display_name":"Yanfei Xiang","orcid":"https://orcid.org/0000-0002-5755-4114"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yanfei Xiang","raw_affiliation_strings":["Ministry of Education, Key Laboratory for Earth System Modeling, Institute for Global Change Studies, Tsinghua University,Department of Earth System Science,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"Ministry of Education, Key Laboratory for Earth System Modeling, Institute for Global Change Studies, Tsinghua University,Department of Earth System Science,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100328088","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0003-1947-9325"},"institutions":[{"id":"https://openalex.org/I63190737","display_name":"University at Buffalo, State University of New York","ror":"https://ror.org/01y64my43","country_code":"US","type":"education","lineage":["https://openalex.org/I63190737"]},{"id":"https://openalex.org/I392282","display_name":"University at Albany, State University of New York","ror":"https://ror.org/012zs8222","country_code":"US","type":"education","lineage":["https://openalex.org/I392282"]},{"id":"https://openalex.org/I113508548","display_name":"Albany State University","ror":"https://ror.org/01vme4277","country_code":"US","type":"education","lineage":["https://openalex.org/I113508548"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xin Wang","raw_affiliation_strings":["University at Albany, State University of New York,USA","University at Albany, State University of New York, USA","University at Buffalo, State University of New York, USA"],"affiliations":[{"raw_affiliation_string":"University at Albany, State University of New York,USA","institution_ids":["https://openalex.org/I113508548"]},{"raw_affiliation_string":"University at Albany, State University of New York, USA","institution_ids":["https://openalex.org/I113508548","https://openalex.org/I392282"]},{"raw_affiliation_string":"University at Buffalo, State University of New York, USA","institution_ids":["https://openalex.org/I63190737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100687829","display_name":"Shu Hu","orcid":"https://orcid.org/0000-0003-1446-4140"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shu Hu","raw_affiliation_strings":["Carnegie Mellon University,USA","Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100619754","display_name":"Bin Zhu","orcid":"https://orcid.org/0000-0002-5478-1426"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Zhu","raw_affiliation_strings":["Microsoft Research Asia,China","Microsoft Research Asia, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia,China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft Research Asia, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052865881","display_name":"Xiaomeng Huang","orcid":"https://orcid.org/0000-0002-4158-1089"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaomeng Huang","raw_affiliation_strings":["Ministry of Education, Key Laboratory for Earth System Modeling, Institute for Global Change Studies, Tsinghua University,Department of Earth System Science,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"Ministry of Education, Key Laboratory for Earth System Modeling, Institute for Global Change Studies, Tsinghua University,Department of Earth System Science,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100626231","display_name":"Xi Wu","orcid":"https://orcid.org/0000-0002-8405-9530"},"institutions":[{"id":"https://openalex.org/I24201400","display_name":"Chengdu University of Information Technology","ror":"https://ror.org/01yxwrh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I24201400"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xi Wu","raw_affiliation_strings":["Chengdu University of Information Technology,China","Chengdu University of Information Technology, China"],"affiliations":[{"raw_affiliation_string":"Chengdu University of Information Technology,China","institution_ids":["https://openalex.org/I24201400"]},{"raw_affiliation_string":"Chengdu University of Information Technology, China","institution_ids":["https://openalex.org/I24201400"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023752172","display_name":"Siwei Lyu","orcid":"https://orcid.org/0000-0002-0992-685X"},"institutions":[{"id":"https://openalex.org/I63190737","display_name":"University at Buffalo, State University of New York","ror":"https://ror.org/01y64my43","country_code":"US","type":"education","lineage":["https://openalex.org/I63190737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siwei Lyu","raw_affiliation_strings":["University at Buffalo, State University of New York,USA","University at Buffalo, State University of New York, USA"],"affiliations":[{"raw_affiliation_string":"University at Buffalo, State University of New York,USA","institution_ids":["https://openalex.org/I63190737"]},{"raw_affiliation_string":"University at Buffalo, State University of New York, USA","institution_ids":["https://openalex.org/I63190737"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5071160327"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.0548,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.82223006,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1207","last_page":"1214"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9860000014305115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8738882541656494},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.8121888637542725},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7899052500724792},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.771034836769104},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6711597442626953},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.6517848372459412},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5930684804916382},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.45291751623153687},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4454788863658905},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.08954933285713196}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8738882541656494},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.8121888637542725},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7899052500724792},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.771034836769104},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6711597442626953},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.6517848372459412},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5930684804916382},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.45291751623153687},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4454788863658905},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.08954933285713196},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros55552.2023.10342479","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342479","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5820790610","display_name":null,"funder_award_id":"42125503,42075137","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":82,"referenced_works":["https://openalex.org/W834081922","https://openalex.org/W1191599655","https://openalex.org/W1570792896","https://openalex.org/W1583837637","https://openalex.org/W1771410628","https://openalex.org/W2076337359","https://openalex.org/W2098774185","https://openalex.org/W2109169869","https://openalex.org/W2145339207","https://openalex.org/W2150468603","https://openalex.org/W2151210636","https://openalex.org/W2155027007","https://openalex.org/W2158782408","https://openalex.org/W2168438882","https://openalex.org/W2342662072","https://openalex.org/W2575705757","https://openalex.org/W2736601468","https://openalex.org/W2768233943","https://openalex.org/W2781585732","https://openalex.org/W2781726626","https://openalex.org/W2787938642","https://openalex.org/W2798705390","https://openalex.org/W2930863966","https://openalex.org/W2949561945","https://openalex.org/W2949600457","https://openalex.org/W2950527027","https://openalex.org/W2951762469","https://openalex.org/W2962759351","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2964227158","https://openalex.org/W2964262254","https://openalex.org/W3025606523","https://openalex.org/W3036619998","https://openalex.org/W3036670859","https://openalex.org/W3082349339","https://openalex.org/W3088304681","https://openalex.org/W3095027108","https://openalex.org/W3110059731","https://openalex.org/W3115293622","https://openalex.org/W3122690883","https://openalex.org/W3132674603","https://openalex.org/W3175558129","https://openalex.org/W3186672455","https://openalex.org/W4226160878","https://openalex.org/W4250979948","https://openalex.org/W4287662662","https://openalex.org/W4288102901","https://openalex.org/W4288320194","https://openalex.org/W4293872189","https://openalex.org/W4302570325","https://openalex.org/W6623316541","https://openalex.org/W6627932998","https://openalex.org/W6638018090","https://openalex.org/W6674884181","https://openalex.org/W6680657880","https://openalex.org/W6682314434","https://openalex.org/W6683204974","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6687086880","https://openalex.org/W6692846177","https://openalex.org/W6704571135","https://openalex.org/W6732386917","https://openalex.org/W6736469832","https://openalex.org/W6741002519","https://openalex.org/W6745873365","https://openalex.org/W6748599296","https://openalex.org/W6748839928","https://openalex.org/W6750645735","https://openalex.org/W6754782557","https://openalex.org/W6765242767","https://openalex.org/W6767997047","https://openalex.org/W6775634482","https://openalex.org/W6777091672","https://openalex.org/W6780189587","https://openalex.org/W6780404908","https://openalex.org/W6782760920","https://openalex.org/W6782766965","https://openalex.org/W6783852185","https://openalex.org/W6783988234","https://openalex.org/W6791000347"],"related_works":["https://openalex.org/W4238897586","https://openalex.org/W435179959","https://openalex.org/W2619091065","https://openalex.org/W2059640416","https://openalex.org/W1490753184","https://openalex.org/W2284465472","https://openalex.org/W2291782699","https://openalex.org/W1993948687","https://openalex.org/W2000169967","https://openalex.org/W4399363378"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,21,32,99,113,158],"is":[2,162],"used":[3],"to":[4,67,72,115],"tackle":[5],"complex":[6],"tasks":[7,134,170],"with":[8,26,87,136,153],"high-dimensional":[9,88],"sensory":[10,35],"inputs.":[11],"Over":[12],"the":[13,81,128],"past":[14],"decade,":[15],"a":[16,40,69],"wide":[17],"range":[18],"of":[19,127,144],"reinforcement":[20,98],"algorithms":[22,47,100,130,141],"have":[23],"been":[24],"developed,":[25],"recent":[27],"progress":[28],"benefiting":[29],"from":[30],"deep":[31],"for":[33,84],"raw":[34],"signal":[36],"representation.":[37],"This":[38],"raises":[39],"natural":[41],"question:":[42],"how":[43],"well":[44],"do":[45],"these":[46],"perform":[48],"across":[49],"different":[50],"robotic":[51,85],"manipulation":[52],"tasks?":[53],"To":[54],"objectively":[55],"compare":[56,73],"algorithms,":[57],"benchmarks":[58],"use":[59,63],"performance":[60,65,111,118],"metrics.":[61],"Benchmarks":[62],"objective":[64],"metrics":[66],"offer":[68],"scientific":[70],"way":[71],"algorithms.":[74,173],"In":[75],"this":[76],"paper,":[77],"we":[78],"introduce":[79],"RMBench,":[80],"first":[82],"benchmark":[83,169],"manipulations":[86],"continuous":[89],"action":[90],"and":[91,96,107,112,119,147,149,171],"state":[92],"spaces.":[93],"We":[94],"implement":[95],"evaluate":[97],"that":[101,125],"take":[102],"observed":[103],"pixels":[104],"as":[105],"inputs":[106],"report":[108],"their":[109,117],"average":[110,145],"curves":[114],"demonstrate":[116],"training":[120],"stability.":[121],"Our":[122,160],"study":[123],"concludes":[124],"none":[126],"evaluated":[129],"can":[131],"handle":[132],"all":[133,168],"well,":[135],"soft":[137],"Actor-Critic":[138],"outperforming":[139],"most":[140],"in":[142],"terms":[143],"reward":[146],"stability,":[148],"an":[150],"algorithm":[151],"combined":[152],"data":[154],"augmentation":[155],"potentially":[156],"facilitating":[157],"policies.":[159],"code":[161],"publicly":[163],"available":[164],"at":[165],"https://github.com/xiangyanfei212/RMBench-2022.git,":[166],"including":[167],"studied":[172]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
