{"id":"https://openalex.org/W2904165692","doi":"https://doi.org/10.1515/pjbr-2018-0026","title":"Deep reinforcement learning using compositional representations for performing instructions","display_name":"Deep reinforcement learning using compositional representations for performing instructions","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2904165692","doi":"https://doi.org/10.1515/pjbr-2018-0026","mag":"2904165692"},"language":"en","primary_location":{"id":"doi:10.1515/pjbr-2018-0026","is_oa":true,"landing_page_url":"https://doi.org/10.1515/pjbr-2018-0026","pdf_url":"https://www.degruyter.com/document/doi/10.1515/pjbr-2018-0026/pdf","source":{"id":"https://openalex.org/S4210226531","display_name":"Paladyn Journal of Behavioral Robotics","issn_l":"2081-4836","issn":["2081-4836"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320322","host_organization_name":"De Gruyter Open","host_organization_lineage":["https://openalex.org/P4310320322","https://openalex.org/P4310313990"],"host_organization_lineage_names":["De Gruyter Open","De Gruyter"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Paladyn, Journal of Behavioral Robotics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.degruyter.com/document/doi/10.1515/pjbr-2018-0026/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101942305","display_name":"Mohammad Ali Zamani","orcid":"https://orcid.org/0000-0001-5350-645X"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Mohammad Ali Zamani","raw_affiliation_strings":["Knowledge Technology, Department of Informatics, University of Hamburg, Vogt-Koelln-Str. 30, Hamburg , Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Knowledge Technology, Department of Informatics, University of Hamburg, Vogt-Koelln-Str. 30, Hamburg , Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015567592","display_name":"Sven Magg","orcid":"https://orcid.org/0000-0002-0589-6585"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sven Magg","raw_affiliation_strings":["Knowledge Technology, Department of Informatics, University of Hamburg, Vogt- Koelln-Str. 30, Hamburg , Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Knowledge Technology, Department of Informatics, University of Hamburg, Vogt- Koelln-Str. 30, Hamburg , Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102025003","display_name":"Cornelius Weber","orcid":"https://orcid.org/0000-0001-5163-938X"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Cornelius Weber","raw_affiliation_strings":["Knowledge Technology, Department of Informatics, University of Hamburg, Vogt- Koelln-Str. 30, Hamburg , Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Knowledge Technology, Department of Informatics, University of Hamburg, Vogt- Koelln-Str. 30, Hamburg , Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033486668","display_name":"Stefan Wermter","orcid":"https://orcid.org/0000-0003-1343-4775"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stefan Wermter","raw_affiliation_strings":["Knowledge Technology, Department of Informatics, University of Hamburg, Vogt- Koelln-Str. 30, Hamburg , Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Knowledge Technology, Department of Informatics, University of Hamburg, Vogt- Koelln-Str. 30, Hamburg , Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103207941","display_name":"Di Fu","orcid":"https://orcid.org/0000-0002-5385-2982"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["CN","DE"],"is_corresponding":false,"raw_author_name":"Di Fu","raw_affiliation_strings":["CAS Key Laboratory of Behavioral Science, Chinese Academy of Sciences, Beijing , China","Department of Psychology, University of Chinese Academy of Sciences, Beijing , China","Knowledge Technology, Department of Informatics, University of Hamburg, Vogt-Koelln-Str. 30, Hamburg , Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CAS Key Laboratory of Behavioral Science, Chinese Academy of Sciences, Beijing , China","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"Department of Psychology, University of Chinese Academy of Sciences, Beijing , China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Knowledge Technology, Department of Informatics, University of Hamburg, Vogt-Koelln-Str. 30, Hamburg , Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101942305"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"],"apc_list":{"value":1000,"currency":"EUR","value_usd":1078},"apc_paid":{"value":1000,"currency":"EUR","value_usd":1078},"fwci":0.1692,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.62399605,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"9","issue":"1","first_page":"358","last_page":"373"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.86585533618927},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8431606292724609},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.663282036781311},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6483269929885864},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.6096035838127136},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6013713479042053},{"id":"https://openalex.org/keywords/plan","display_name":"Plan (archaeology)","score":0.5499224662780762},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4918243885040283},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.46740394830703735},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4265492558479309},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.42605340480804443},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3581949770450592},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12774759531021118}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.86585533618927},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8431606292724609},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.663282036781311},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6483269929885864},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.6096035838127136},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6013713479042053},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.5499224662780762},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4918243885040283},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.46740394830703735},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4265492558479309},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.42605340480804443},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3581949770450592},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12774759531021118},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1515/pjbr-2018-0026","is_oa":true,"landing_page_url":"https://doi.org/10.1515/pjbr-2018-0026","pdf_url":"https://www.degruyter.com/document/doi/10.1515/pjbr-2018-0026/pdf","source":{"id":"https://openalex.org/S4210226531","display_name":"Paladyn Journal of Behavioral Robotics","issn_l":"2081-4836","issn":["2081-4836"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320322","host_organization_name":"De Gruyter Open","host_organization_lineage":["https://openalex.org/P4310320322","https://openalex.org/P4310313990"],"host_organization_lineage_names":["De Gruyter Open","De Gruyter"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Paladyn, Journal of Behavioral Robotics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:5a5754a5178344bfac592fe4b31d50c3","is_oa":true,"landing_page_url":"https://doaj.org/article/5a5754a5178344bfac592fe4b31d50c3","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Paladyn, Vol 9, Iss 1, Pp 358-373 (2018)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1515/pjbr-2018-0026","is_oa":true,"landing_page_url":"https://doi.org/10.1515/pjbr-2018-0026","pdf_url":"https://www.degruyter.com/document/doi/10.1515/pjbr-2018-0026/pdf","source":{"id":"https://openalex.org/S4210226531","display_name":"Paladyn Journal of Behavioral Robotics","issn_l":"2081-4836","issn":["2081-4836"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320322","host_organization_name":"De Gruyter Open","host_organization_lineage":["https://openalex.org/P4310320322","https://openalex.org/P4310313990"],"host_organization_lineage_names":["De Gruyter Open","De Gruyter"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Paladyn, Journal of Behavioral Robotics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5899999737739563,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2096171154","display_name":"Safety Enables Cooperation in Uncertain Robotic Environments","funder_award_id":"642667","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G6276684567","display_name":null,"funder_award_id":"TRR 169","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2904165692.pdf","grobid_xml":"https://content.openalex.org/works/W2904165692.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W1872023060","https://openalex.org/W1934909785","https://openalex.org/W1973674690","https://openalex.org/W1976207411","https://openalex.org/W2044259325","https://openalex.org/W2064675550","https://openalex.org/W2121110499","https://openalex.org/W2133230405","https://openalex.org/W2139283211","https://openalex.org/W2145339207","https://openalex.org/W2168921921","https://openalex.org/W2229480318","https://openalex.org/W2250539671","https://openalex.org/W2251648989","https://openalex.org/W2608968617","https://openalex.org/W2612581922","https://openalex.org/W2730274928","https://openalex.org/W2746553466","https://openalex.org/W2919115771"],"related_works":["https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2076061571","https://openalex.org/W2106552856","https://openalex.org/W1987513656","https://openalex.org/W2072376847","https://openalex.org/W2089013912"],"abstract_inverted_index":{"Abstract":[0],"Spoken":[1],"language":[2],"is":[3,75,112],"one":[4],"of":[5,19,58,66,78,86,106,131,141],"the":[6,17,20,40,64,67,76,104,109,129,134,147,159,169,177],"most":[7],"efficientways":[8],"to":[9,23,26,38,46,95,114,180],"instruct":[10],"robots":[11],"about":[12],"performing":[13],"domestic":[14],"tasks.":[15],"However,":[16],"state":[18,65,142],"environment":[21],"has":[22],"be":[24,96,172],"considered":[25],"plan":[27],"and":[28,43,84,120],"execute":[29],"actions":[30,59,132],"successfully.":[31],"We":[32,144],"propose":[33],"a":[34,47,56,87,99,122,139],"system":[35,53,148],"that":[36,146,168],"learns":[37],"recognise":[39,151],"user\u2019s":[41],"intention":[42,117],"map":[44],"it":[45,94],"goal.":[48],"A":[49,69],"reinforcement":[50],"learning":[51],"(RL)":[52],"then":[54],"generates":[55,128],"sequence":[57],"toward":[60],"this":[61,73],"goal":[62],"considering":[63],"environment.":[68],"novel":[70],"contribution":[71],"in":[72,98,121],"paper":[74],"use":[77],"symbolic":[79,164,178],"representations":[80],"for":[81],"both":[82],"input":[83],"output":[85],"neural":[88],"Deep":[89],"Q-network":[90],"(DQN),":[91],"which":[92],"enables":[93],"used":[97,113],"hybrid":[100],"system.":[101],"To":[102],"show":[103,145,167],"effectiveness":[105],"our":[107],"approach,":[108],"Tell-Me-Dave":[110],"corpus":[111,155],"train":[115,158],"an":[116,125],"detection":[118],"model":[119],"second":[123],"step":[124],"RL":[126,161],"agent":[127],"sequences":[130,153],"towards":[133],"detected":[135],"objective,":[136],"represented":[137],"by":[138,175],"set":[140],"predicates.":[143],"can":[149,171],"successfully":[150],"command":[152],"fromthis":[154],"aswell":[156],"as":[157],"deep-":[160],"network":[162],"with":[163],"input.We":[165],"further":[166],"performance":[170],"significantly":[173],"increased":[174],"exploiting":[176],"representation":[179],"generate":[181],"intermediate":[182],"rewards.":[183]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
