{"id":"https://openalex.org/W4390111349","doi":"https://doi.org/10.3389/frobt.2023.1280578","title":"Decomposing user-defined tasks in a reinforcement learning setup using TextWorld","display_name":"Decomposing user-defined tasks in a reinforcement learning setup using TextWorld","publication_year":2023,"publication_date":"2023-12-22","ids":{"openalex":"https://openalex.org/W4390111349","doi":"https://doi.org/10.3389/frobt.2023.1280578","pmid":"https://pubmed.ncbi.nlm.nih.gov/38187474"},"language":"en","primary_location":{"id":"doi:10.3389/frobt.2023.1280578","is_oa":true,"landing_page_url":"https://doi.org/10.3389/frobt.2023.1280578","pdf_url":"https://www.frontiersin.org/articles/10.3389/frobt.2023.1280578/pdf?isPublishedV2=False","source":{"id":"https://openalex.org/S2595095599","display_name":"Frontiers in Robotics and AI","issn_l":"2296-9144","issn":["2296-9144"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Robotics and AI","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.frontiersin.org/articles/10.3389/frobt.2023.1280578/pdf?isPublishedV2=False","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093561939","display_name":"Thanos Petsanis","orcid":"https://orcid.org/0009-0005-2199-2461"},"institutions":[{"id":"https://openalex.org/I147962203","display_name":"Democritus University of Thrace","ror":"https://ror.org/03bfqnx40","country_code":"GR","type":"education","lineage":["https://openalex.org/I147962203"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Thanos Petsanis","raw_affiliation_strings":["School of Engineering, Department of Electrical and Computer Engineering, Democritus University of Thrace (DUTH), Xanthi, Greece"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Department of Electrical and Computer Engineering, Democritus University of Thrace (DUTH), Xanthi, Greece","institution_ids":["https://openalex.org/I147962203"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017025617","display_name":"Christoforos Keroglou","orcid":"https://orcid.org/0000-0002-6461-439X"},"institutions":[{"id":"https://openalex.org/I147962203","display_name":"Democritus University of Thrace","ror":"https://ror.org/03bfqnx40","country_code":"GR","type":"education","lineage":["https://openalex.org/I147962203"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Christoforos Keroglou","raw_affiliation_strings":["School of Engineering, Department of Electrical and Computer Engineering, Democritus University of Thrace (DUTH), Xanthi, Greece"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Department of Electrical and Computer Engineering, Democritus University of Thrace (DUTH), Xanthi, Greece","institution_ids":["https://openalex.org/I147962203"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046634751","display_name":"Athanasios Ch. Kapoutsis","orcid":"https://orcid.org/0000-0002-1688-036X"},"institutions":[{"id":"https://openalex.org/I4210093649","display_name":"Information Technologies Institute","ror":"https://ror.org/0069akp70","country_code":"GR","type":"nonprofit","lineage":["https://openalex.org/I4210093649"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Athanasios Ch. Kapoutsis","raw_affiliation_strings":["The Centre for Research and Technology, Information Technologies Institute, Thessaloniki, Greece"],"affiliations":[{"raw_affiliation_string":"The Centre for Research and Technology, Information Technologies Institute, Thessaloniki, Greece","institution_ids":["https://openalex.org/I4210093649"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071596476","display_name":"Elias B. Kosmatopoulos","orcid":"https://orcid.org/0000-0002-3735-4238"},"institutions":[{"id":"https://openalex.org/I147962203","display_name":"Democritus University of Thrace","ror":"https://ror.org/03bfqnx40","country_code":"GR","type":"education","lineage":["https://openalex.org/I147962203"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Elias B. Kosmatopoulos","raw_affiliation_strings":["School of Engineering, Department of Electrical and Computer Engineering, Democritus University of Thrace (DUTH), Xanthi, Greece"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Department of Electrical and Computer Engineering, Democritus University of Thrace (DUTH), Xanthi, Greece","institution_ids":["https://openalex.org/I147962203"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008924272","display_name":"Georgios Ch. Sirakoulis","orcid":"https://orcid.org/0000-0001-8240-484X"},"institutions":[{"id":"https://openalex.org/I147962203","display_name":"Democritus University of Thrace","ror":"https://ror.org/03bfqnx40","country_code":"GR","type":"education","lineage":["https://openalex.org/I147962203"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Georgios Ch. Sirakoulis","raw_affiliation_strings":["School of Engineering, Department of Electrical and Computer Engineering, Democritus University of Thrace (DUTH), Xanthi, Greece"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Department of Electrical and Computer Engineering, Democritus University of Thrace (DUTH), Xanthi, Greece","institution_ids":["https://openalex.org/I147962203"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5093561939"],"corresponding_institution_ids":["https://openalex.org/I147962203"],"apc_list":{"value":1900,"currency":"USD","value_usd":1900},"apc_paid":{"value":1900,"currency":"USD","value_usd":1900},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18545711,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"10","issue":null,"first_page":"1280578","last_page":"1280578"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8792915344238281},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7618180513381958},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7412072420120239},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.7028147578239441},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.5748390555381775},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.542628288269043},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4519082307815552},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4443351924419403},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4123099148273468},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16332408785820007}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8792915344238281},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7618180513381958},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7412072420120239},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.7028147578239441},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.5748390555381775},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.542628288269043},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4519082307815552},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4443351924419403},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4123099148273468},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16332408785820007},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3389/frobt.2023.1280578","is_oa":true,"landing_page_url":"https://doi.org/10.3389/frobt.2023.1280578","pdf_url":"https://www.frontiersin.org/articles/10.3389/frobt.2023.1280578/pdf?isPublishedV2=False","source":{"id":"https://openalex.org/S2595095599","display_name":"Frontiers in Robotics and AI","issn_l":"2296-9144","issn":["2296-9144"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Robotics and AI","raw_type":"journal-article"},{"id":"pmid:38187474","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38187474","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in robotics and AI","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:10766815","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/10766815","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC10766815/pdf/frobt-10-1280578.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Front Robot AI","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:816ddadbba88447998404982588c6077","is_oa":true,"landing_page_url":"https://doaj.org/article/816ddadbba88447998404982588c6077","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Frontiers in Robotics and AI, Vol 10 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3389/frobt.2023.1280578","is_oa":true,"landing_page_url":"https://doi.org/10.3389/frobt.2023.1280578","pdf_url":"https://www.frontiersin.org/articles/10.3389/frobt.2023.1280578/pdf?isPublishedV2=False","source":{"id":"https://openalex.org/S2595095599","display_name":"Frontiers in Robotics and AI","issn_l":"2296-9144","issn":["2296-9144"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Robotics and AI","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1337233005","display_name":null,"funder_award_id":"NSRF 2014-2020","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G2881834948","display_name":null,"funder_award_id":"MIS 5047294","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G3875748329","display_name":null,"funder_award_id":"European Regional Development Fund","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G453067482","display_name":null,"funder_award_id":"014-2020","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G5634946813","display_name":null,"funder_award_id":"2014-2020","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G6870533126","display_name":null,"funder_award_id":"2014-2020","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G7538097724","display_name":null,"funder_award_id":"NSRF 2014-2020","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G8538941428","display_name":null,"funder_award_id":"Regional Development Fund","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8570012161","display_name":null,"funder_award_id":"unknown","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G8770806815","display_name":null,"funder_award_id":"NSRF 2014-20","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4390111349.pdf"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W1130790960","https://openalex.org/W1488730473","https://openalex.org/W1557517019","https://openalex.org/W1563854815","https://openalex.org/W1583837637","https://openalex.org/W1592847719","https://openalex.org/W1777239053","https://openalex.org/W1977655452","https://openalex.org/W1981627423","https://openalex.org/W2064675550","https://openalex.org/W2153000379","https://openalex.org/W2198041288","https://openalex.org/W2474655341","https://openalex.org/W2567705466","https://openalex.org/W2736601468","https://openalex.org/W2810346659","https://openalex.org/W2938421504","https://openalex.org/W2963575966","https://openalex.org/W2963800628","https://openalex.org/W2964043796","https://openalex.org/W2966477667","https://openalex.org/W3009928773","https://openalex.org/W3034758614","https://openalex.org/W3081567831","https://openalex.org/W3092156990","https://openalex.org/W3126325318","https://openalex.org/W3128894241","https://openalex.org/W3153120990","https://openalex.org/W3158799570","https://openalex.org/W4207072548","https://openalex.org/W4221160486","https://openalex.org/W4225675121","https://openalex.org/W4256361765","https://openalex.org/W4293730687","https://openalex.org/W4360897844","https://openalex.org/W4379795177","https://openalex.org/W4382362889","https://openalex.org/W4385262477","https://openalex.org/W4393147000","https://openalex.org/W6605823007","https://openalex.org/W6629152578","https://openalex.org/W6638088447","https://openalex.org/W6752298494","https://openalex.org/W6840041517"],"related_works":["https://openalex.org/W2341492732","https://openalex.org/W3187193180","https://openalex.org/W106542691","https://openalex.org/W4287027380","https://openalex.org/W1699080303","https://openalex.org/W4297799326","https://openalex.org/W3116064965","https://openalex.org/W3193760048","https://openalex.org/W4285822516","https://openalex.org/W2768698792"],"abstract_inverted_index":{"The":[0],"current":[1],"paper":[2,120],"proposes":[3],"a":[4,12,30,58,70,91,97],"hierarchical":[5],"reinforcement":[6],"learning":[7],"(HRL)":[8],"method":[9],"to":[10,21,89,121],"decompose":[11],"complex":[13],"task":[14],"into":[15],"simpler":[16],"sub-tasks":[17],"and":[18,43,53,86],"leverage":[19],"those":[20],"improve":[22],"the":[23,63,102,110,119],"training":[24],"of":[25,62,73,96,112],"an":[26],"autonomous":[27],"agent":[28],"in":[29],"simulated":[31,60,75],"environment.":[32,76],"For":[33],"practical":[34],"reasons":[35],"(i.e.,":[36],"illustrating":[37],"purposes,":[38],"easy":[39],"implementation,":[40],"user-friendly":[41],"interface,":[42],"useful":[44],"functionalities),":[45],"we":[46,107],"employ":[47],"two":[48],"Python":[49],"frameworks":[50],"called":[51],"TextWorld":[52,67],"MiniGrid.":[54],"MiniGrid":[55],"functions":[56,68],"as":[57,69,106],"2D":[59],"representation":[61],"real":[64],"environment,":[65,104],"while":[66],"high-level":[71],"abstraction":[72,80],"this":[74,79],"Training":[77],"on":[78],"disentangles":[81],"manipulation":[82],"from":[83,129],"navigation":[84],"actions":[85],"allows":[87],"us":[88],"design":[90],"dense":[92],"reward":[93,99],"function":[94,100],"instead":[95],"sparse":[98],"for":[101],"lower-level":[103],"which,":[105],"show,":[108],"improves":[109],"performance":[111],"training.":[113],"Formal":[114],"methods":[115],"are":[116],"utilized":[117],"throughout":[118],"establish":[122],"that":[123],"our":[124],"algorithm":[125],"is":[126],"not":[127],"prevented":[128],"deriving":[130],"solutions.":[131]},"counts_by_year":[],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
