{"id":"https://openalex.org/W4312823397","doi":"https://doi.org/10.1109/icdl53763.2022.9962207","title":"Grounding Hindsight Instructions in Multi-Goal Reinforcement Learning for Robotics","display_name":"Grounding Hindsight Instructions in Multi-Goal Reinforcement Learning for Robotics","publication_year":2022,"publication_date":"2022-09-12","ids":{"openalex":"https://openalex.org/W4312823397","doi":"https://doi.org/10.1109/icdl53763.2022.9962207"},"language":"en","primary_location":{"id":"doi:10.1109/icdl53763.2022.9962207","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdl53763.2022.9962207","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Development and Learning (ICDL)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018588611","display_name":"Frank Roder","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I4210136595","display_name":"Hamburg Institut (Germany)","ror":"https://ror.org/03t0n2419","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210136595"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Frank Roder","raw_affiliation_strings":["Institute for Data Science Foundations Hamburg University of Technology,Hamburg,Germany","Institute for Data Science Foundations Hamburg University of Technology, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Data Science Foundations Hamburg University of Technology,Hamburg,Germany","institution_ids":["https://openalex.org/I4210136595","https://openalex.org/I159176309","https://openalex.org/I884043246"]},{"raw_affiliation_string":"Institute for Data Science Foundations Hamburg University of Technology, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050108731","display_name":"Manfred Eppe","orcid":"https://orcid.org/0000-0002-5473-3221"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I4210136595","display_name":"Hamburg Institut (Germany)","ror":"https://ror.org/03t0n2419","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210136595"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Manfred Eppe","raw_affiliation_strings":["Institute for Data Science Foundations Hamburg University of Technology,Hamburg,Germany","Institute for Data Science Foundations Hamburg University of Technology, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Data Science Foundations Hamburg University of Technology,Hamburg,Germany","institution_ids":["https://openalex.org/I4210136595","https://openalex.org/I159176309","https://openalex.org/I884043246"]},{"raw_affiliation_string":"Institute for Data Science Foundations Hamburg University of Technology, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033486668","display_name":"Stefan Wermter","orcid":"https://orcid.org/0000-0003-1343-4775"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stefan Wermter","raw_affiliation_strings":["University of Hamburg,Knowledge Technology,Hamburg,Germany","Knowledge Technology, University of Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"University of Hamburg,Knowledge Technology,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]},{"raw_affiliation_string":"Knowledge Technology, University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5018588611"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I4210136595","https://openalex.org/I884043246"],"apc_list":null,"apc_paid":null,"fwci":0.7957,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.77304901,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"170","last_page":"177"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9664999842643738,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hindsight-bias","display_name":"Hindsight bias","score":0.9695160388946533},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8006023168563843},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7550249099731445},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6566545963287354},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6343475580215454},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.5550861954689026},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5438398122787476},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4837200939655304},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.45793771743774414},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3627518117427826},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.23368534445762634},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.11671039462089539}],"concepts":[{"id":"https://openalex.org/C10347200","wikidata":"https://www.wikidata.org/wiki/Q1960297","display_name":"Hindsight bias","level":2,"score":0.9695160388946533},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8006023168563843},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7550249099731445},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6566545963287354},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6343475580215454},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.5550861954689026},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5438398122787476},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4837200939655304},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45793771743774414},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3627518117427826},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.23368534445762634},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.11671039462089539},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icdl53763.2022.9962207","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdl53763.2022.9962207","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Development and Learning (ICDL)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.800000011920929}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3197854638","https://openalex.org/W3140454661","https://openalex.org/W4245029315","https://openalex.org/W2139970489","https://openalex.org/W1492315459","https://openalex.org/W1512434910","https://openalex.org/W2540910169","https://openalex.org/W3089780453","https://openalex.org/W3148904318","https://openalex.org/W3012552522"],"abstract_inverted_index":{"This":[0],"paper":[1],"focuses":[2],"on":[3],"robotic":[4],"reinforcement":[5],"learning":[6,78,87,103,133],"with":[7,44,161],"sparse":[8],"rewards":[9],"for":[10,52,148],"natural":[11,27,145],"language":[12,34,146],"goal":[13,147],"representations.":[14],"An":[15],"open":[16],"problem":[17],"is":[18],"the":[19,24,31,86,102,118,157,162],"sample-inefficiency":[20],"that":[21,82,101,138,156],"stems":[22],"from":[23,30],"compositionality":[25],"of":[26,33,76,105],"language,":[28],"and":[29,38],"grounding":[32],"in":[35,115,125],"sensory":[36],"data":[37],"actions.":[39],"We":[40,47,80,99,129],"address":[41],"these":[42],"issues":[43],"three":[45],"contributions.":[46],"first":[48],"present":[49,72],"a":[50,62,73,116,126,144],"mechanism":[51],"hindsight":[53,68,83],"instruction":[54],"replay":[55],"utilizing":[56],"expert":[57],"feedback.":[58],"Second,":[59],"we":[60,71,93],"propose":[61],"seq2seq":[63],"model":[64],"to":[65,121,123,134],"generate":[66,135],"linguistic":[67,136],"instructions.":[69],"Finally,":[70],"novel":[74],"class":[75],"language-focused":[77],"tasks.":[79],"show":[81,100],"instructions":[84,137],"improve":[85],"performance,":[88],"as":[89,143],"expected.":[90],"In":[91],"addition,":[92],"also":[94],"provide":[95],"an":[96,149],"unexpected":[97],"result:":[98],"performance":[104,158],"our":[106],"agent":[107,119],"can":[108],"be":[109],"improved":[110],"by":[111,132],"one":[112],"third":[113],"if,":[114],"sense,":[117],"learns":[120],"talk":[122],"itself":[124],"self-supervised":[127],"manner.":[128],"achieve":[130],"this":[131],"would":[139],"have":[140],"been":[141],"appropriate":[142],"originally":[150],"unintended":[151],"behavior.":[152],"Our":[153],"results":[154],"indicate":[155],"gain":[159],"increases":[160],"task-complexity.":[163]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
