{"id":"https://openalex.org/W3174288549","doi":"https://doi.org/10.1109/access.2021.3090364","title":"Subgoal-Based Reward Shaping to Improve Efficiency in Reinforcement Learning","display_name":"Subgoal-Based Reward Shaping to Improve Efficiency in Reinforcement Learning","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3174288549","doi":"https://doi.org/10.1109/access.2021.3090364","mag":"3174288549"},"language":"en","primary_location":{"id":"doi:10.1109/access.2021.3090364","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3090364","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09459751.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09459751.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057299724","display_name":"Takato Okudo","orcid":"https://orcid.org/0000-0002-7218-7842"},"institutions":[{"id":"https://openalex.org/I200475212","display_name":"The Graduate University for Advanced Studies, SOKENDAI","ror":"https://ror.org/0516ah480","country_code":"JP","type":"education","lineage":["https://openalex.org/I200475212"]},{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takato Okudo","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan","The Graduate University for Advanced Studies, SOKENDAI, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]},{"raw_affiliation_string":"The Graduate University for Advanced Studies, SOKENDAI, Tokyo, Japan","institution_ids":["https://openalex.org/I200475212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101954161","display_name":"Seiji Yamada","orcid":"https://orcid.org/0000-0002-5907-7382"},"institutions":[{"id":"https://openalex.org/I200475212","display_name":"The Graduate University for Advanced Studies, SOKENDAI","ror":"https://ror.org/0516ah480","country_code":"JP","type":"education","lineage":["https://openalex.org/I200475212"]},{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Seiji Yamada","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan","The Graduate University for Advanced Studies, SOKENDAI, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]},{"raw_affiliation_string":"The Graduate University for Advanced Studies, SOKENDAI, Tokyo, Japan","institution_ids":["https://openalex.org/I200475212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5057299724"],"corresponding_institution_ids":["https://openalex.org/I184597095","https://openalex.org/I200475212"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.9572,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.88526346,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"9","issue":null,"first_page":"97557","last_page":"97568"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8057423830032349},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7463065981864929},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5975565314292908},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5797470808029175},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5763230919837952},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.49871206283569336},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.4633887708187103},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.45824238657951355},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.45637404918670654},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3206615447998047},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0873999297618866}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8057423830032349},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7463065981864929},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5975565314292908},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5797470808029175},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5763230919837952},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.49871206283569336},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.4633887708187103},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45824238657951355},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.45637404918670654},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3206615447998047},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0873999297618866},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2021.3090364","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3090364","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09459751.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:56cef9b009bf465aac2288ea36d47b3e","is_oa":true,"landing_page_url":"https://doaj.org/article/56cef9b009bf465aac2288ea36d47b3e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 9, Pp 97557-97568 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2021.3090364","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3090364","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09459751.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3174288549.pdf","grobid_xml":"https://content.openalex.org/works/W3174288549.grobid-xml"},"referenced_works_count":78,"referenced_works":["https://openalex.org/W1777239053","https://openalex.org/W1931877416","https://openalex.org/W1934818383","https://openalex.org/W2061562262","https://openalex.org/W2073384958","https://openalex.org/W2097381042","https://openalex.org/W2098441518","https://openalex.org/W2106261932","https://openalex.org/W2109910161","https://openalex.org/W2122410182","https://openalex.org/W2130750514","https://openalex.org/W2155027007","https://openalex.org/W2156737235","https://openalex.org/W2156869222","https://openalex.org/W2161009228","https://openalex.org/W2164419340","https://openalex.org/W2202549229","https://openalex.org/W2293184044","https://openalex.org/W2462906003","https://openalex.org/W2562252866","https://openalex.org/W2594829461","https://openalex.org/W2605102581","https://openalex.org/W2623293810","https://openalex.org/W2788455270","https://openalex.org/W2789008106","https://openalex.org/W2792217087","https://openalex.org/W2837981523","https://openalex.org/W2945317616","https://openalex.org/W2949267040","https://openalex.org/W2962715211","https://openalex.org/W2962957031","https://openalex.org/W2963277051","https://openalex.org/W2963864421","https://openalex.org/W2963871073","https://openalex.org/W2964001908","https://openalex.org/W2964227312","https://openalex.org/W2964263543","https://openalex.org/W2970479807","https://openalex.org/W2971266180","https://openalex.org/W2973007837","https://openalex.org/W3103379718","https://openalex.org/W3138984732","https://openalex.org/W3200024664","https://openalex.org/W4211221179","https://openalex.org/W4288029578","https://openalex.org/W4288093623","https://openalex.org/W4293872189","https://openalex.org/W4300198501","https://openalex.org/W4300799055","https://openalex.org/W4321392130","https://openalex.org/W6638088447","https://openalex.org/W6640174482","https://openalex.org/W6640679069","https://openalex.org/W6674600207","https://openalex.org/W6674812091","https://openalex.org/W6676072908","https://openalex.org/W6683195989","https://openalex.org/W6683204974","https://openalex.org/W6684159546","https://openalex.org/W6684921986","https://openalex.org/W6687713970","https://openalex.org/W6696891444","https://openalex.org/W6718092244","https://openalex.org/W6718836005","https://openalex.org/W6727349600","https://openalex.org/W6730885009","https://openalex.org/W6734215269","https://openalex.org/W6736298280","https://openalex.org/W6739585900","https://openalex.org/W6740801417","https://openalex.org/W6747861887","https://openalex.org/W6748599296","https://openalex.org/W6751955673","https://openalex.org/W6762729159","https://openalex.org/W6767317771","https://openalex.org/W6767862010","https://openalex.org/W6780559895","https://openalex.org/W6793999596"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W4297873223","https://openalex.org/W2350784623","https://openalex.org/W2126211886"],"abstract_inverted_index":{"Reinforcement":[0],"learning,":[1],"which":[2,63],"acquires":[3],"a":[4,53,80,103,142,166,193,211],"policy":[5],"maximizing":[6],"long-term":[7],"rewards,":[8],"has":[9],"been":[10],"actively":[11],"studied.":[12],"Unfortunately,":[13],"this":[14,72],"learning":[15,95,119,196,225],"type":[16],"is":[17,50,97,102,111,125],"too":[18],"slow":[19],"and":[20,140,171,179,183,185,198,205],"difficult":[21,113],"to":[22,58,78,114,154],"use":[23],"in":[24,33,88,174,224],"practical":[25],"situations":[26],"because":[27,74,126],"the":[28,127],"state-action":[29],"space":[30],"becomes":[31],"huge":[32],"real":[34],"environments.":[35],"Many":[36],"studies":[37],"have":[38],"incorporated":[39],"human":[40,46,54,152],"knowledge":[41,47,157],"into":[42],"reinforcement":[43,195],"Learning.":[44],"Though":[45],"on":[48,68,84],"trajectories":[49],"often":[51,112],"used,":[52],"could":[55],"be":[56,65],"asked":[57],"control":[59],"an":[60,85],"AI":[61],"agent,":[62],"can":[64],"difficult.":[66],"Knowledge":[67],"subgoals":[69,116],"may":[70],"lessen":[71],"requirement":[73],"humans":[75],"need":[76],"only":[77],"consider":[79],"few":[81],"representative":[82],"states":[83,178],"optimal":[86],"trajectory":[87],"their":[89,156],"minds.":[90],"The":[91,146],"essential":[92],"factor":[93],"for":[94,106,117,133,151],"efficiency":[96],"rewards.":[98,108],"Potential-based":[99],"reward":[100,122,138,144,208,218],"shaping":[101,139,219],"basic":[104],"method":[105,147,191],"enriching":[107],"However,":[109],"it":[110,149],"incorporate":[115],"accelerating":[118],"over":[120],"potential-based":[121,137],"shaping.":[123,145,209],"This":[124],"appropriate":[128],"potentials":[129],"are":[130],"not":[131],"intuitive":[132],"humans.":[134],"We":[135,188],"extend":[136],"propose":[141],"subgoal-based":[143,200,207],"makes":[148],"easier":[150],"trainers":[153],"share":[155],"of":[158],"subgoals.":[159],"To":[160],"evaluate":[161],"our":[162,190,217],"method,":[163],"we":[164,213],"obtained":[165],"subgoal":[167,204],"series":[168],"from":[169],"participants":[170],"conducted":[172],"experiments":[173],"three":[175],"domains,":[176],"four-rooms(discrete":[177],"discrete":[180],"actions),":[181],"pinball(continuous":[182],"discrete),":[184],"picking(both":[186],"continuous).":[187],"compared":[189],"with":[192],"baseline":[194],"algorithm":[197],"other":[199,222],"methods,":[201],"including":[202],"random":[203],"naive":[206],"As":[210],"result,":[212],"found":[214],"out":[215],"that":[216],"outperformed":[220],"all":[221],"methods":[223],"efficiency.":[226]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":4}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
