{"id":"https://openalex.org/W4401417352","doi":"https://doi.org/10.1109/icra57147.2024.10610705","title":"Human-Robot Gym: Benchmarking Reinforcement Learning in Human-Robot Collaboration","display_name":"Human-Robot Gym: Benchmarking Reinforcement Learning in Human-Robot Collaboration","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401417352","doi":"https://doi.org/10.1109/icra57147.2024.10610705"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610705","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610705","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037578754","display_name":"Jakob Thumm","orcid":null},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Jakob Thumm","raw_affiliation_strings":["Technical University of Munich,Department of Computer Engineering,Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich,Department of Computer Engineering,Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013011993","display_name":"Felix Trost","orcid":null},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Felix Trost","raw_affiliation_strings":["Technical University of Munich,Department of Computer Engineering,Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich,Department of Computer Engineering,Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005383495","display_name":"Matthias Althoff","orcid":"https://orcid.org/0000-0003-3733-842X"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Matthias Althoff","raw_affiliation_strings":["Technical University of Munich,Department of Computer Engineering,Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich,Department of Computer Engineering,Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5037578754"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":2.9504,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.92071462,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"7405","last_page":"7411"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.87123703956604},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7290582656860352},{"id":"https://openalex.org/keywords/human\u2013robot-interaction","display_name":"Human\u2013robot interaction","score":0.6662309169769287},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6574012041091919},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6120707988739014},{"id":"https://openalex.org/keywords/robot-learning","display_name":"Robot learning","score":0.5936373472213745},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.5773946046829224},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.445144385099411},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.33313894271850586},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.07815980911254883}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.87123703956604},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7290582656860352},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.6662309169769287},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6574012041091919},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6120707988739014},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.5936373472213745},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5773946046829224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.445144385099411},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.33313894271850586},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.07815980911254883},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610705","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610705","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W2017957151","https://openalex.org/W2158782408","https://openalex.org/W2296332048","https://openalex.org/W2739274579","https://openalex.org/W2752162720","https://openalex.org/W2791359174","https://openalex.org/W2793935950","https://openalex.org/W2796290181","https://openalex.org/W2906487027","https://openalex.org/W2950430092","https://openalex.org/W3012211643","https://openalex.org/W3088304681","https://openalex.org/W3089482831","https://openalex.org/W3091243689","https://openalex.org/W3104515094","https://openalex.org/W3122928565","https://openalex.org/W3171725195","https://openalex.org/W3206820790","https://openalex.org/W3206921000","https://openalex.org/W3214937729","https://openalex.org/W3216772467","https://openalex.org/W4281252704","https://openalex.org/W4285102237","https://openalex.org/W4307234290","https://openalex.org/W4312790574","https://openalex.org/W4367016688","https://openalex.org/W4378697332","https://openalex.org/W4385430679","https://openalex.org/W4389665566","https://openalex.org/W4401417374","https://openalex.org/W6740801417","https://openalex.org/W6747473740","https://openalex.org/W6769596995","https://openalex.org/W6782766965","https://openalex.org/W6785883622","https://openalex.org/W6799853460","https://openalex.org/W6804601995","https://openalex.org/W6810450869","https://openalex.org/W7006391265"],"related_works":["https://openalex.org/W2930863966","https://openalex.org/W2126211886","https://openalex.org/W3153786280","https://openalex.org/W3127551068","https://openalex.org/W2165180011","https://openalex.org/W3165944253","https://openalex.org/W4220829754","https://openalex.org/W2351343564","https://openalex.org/W2170052867","https://openalex.org/W3119422923"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"has":[4],"shown":[5],"promising":[6],"results":[7],"in":[8,15,26,50,58,129],"robot":[9],"motion":[10],"planning":[11],"with":[12],"first":[13,69],"attempts":[14],"human-robot":[16,42,65,114],"collaboration":[17],"(HRC).":[18],"However,":[19],"a":[20,44,59,74,84,117],"fair":[21],"comparison":[22],"of":[23,31,97,109,131],"RL":[24,49,89,122,137],"approaches":[25],"HRC":[27,56],"under":[28],"the":[29,68,106,110,136,141],"constraint":[30],"guaranteed":[32],"safety":[33,75],"is":[34,67],"yet":[35],"to":[36,77,101,149],"be":[37],"made.":[38],"We,":[39],"therefore,":[40],"present":[41],"gym,":[43],"benchmark":[45,70,119],"suite":[46,71],"for":[47,120],"safe":[48],"HRC.":[51],"We":[52],"provide":[53],"challenging,":[54],"realistic":[55],"tasks":[57,99,111],"modular":[60],"simulation":[61],"framework.":[62],"Most":[63],"importantly,":[64],"gym":[66,115],"that":[72],"includes":[73],"shield":[76],"provably":[78],"guarantee":[79],"human":[80],"safety.":[81],"This":[82],"bridges":[83],"critical":[85],"gap":[86],"between":[87],"theoretic":[88],"research":[90],"and":[91,143],"its":[92],"real-world":[93],"deployment.":[94],"Our":[95],"evaluation":[96],"six":[98],"led":[100],"three":[102],"key":[103],"results:":[104],"(a)":[105],"diverse":[107],"nature":[108],"offered":[112],"by":[113,125],"creates":[116],"challenging":[118],"state-of-the-art":[121],"methods,":[123],"(b)":[124],"leveraging":[126],"expert":[127],"knowledge":[128],"form":[130],"an":[132],"action":[133],"imitation":[134],"reward,":[135],"agent":[138],"can":[139],"outperform":[140],"expert,":[142],"(c)":[144],"our":[145],"agents":[146],"negligibly":[147],"overfit":[148],"training":[150],"data.":[151]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-04T09:10:02.777135","created_date":"2025-10-10T00:00:00"}
