{"id":"https://openalex.org/W4406271625","doi":"https://doi.org/10.1007/s00521-024-10615-2","title":"Using incomplete and incorrect plans to shape reinforcement learning in long-sequence sparse-reward tasks","display_name":"Using incomplete and incorrect plans to shape reinforcement learning in long-sequence sparse-reward tasks","publication_year":2025,"publication_date":"2025-01-10","ids":{"openalex":"https://openalex.org/W4406271625","doi":"https://doi.org/10.1007/s00521-024-10615-2"},"language":"en","primary_location":{"id":"doi:10.1007/s00521-024-10615-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-024-10615-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-024-10615-2.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s00521-024-10615-2.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102954057","display_name":"Henrik H\u00f8eg M\u00fcller","orcid":"https://orcid.org/0000-0002-5719-4278"},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]},{"id":"https://openalex.org/I4210136150","display_name":"L3S Research Center","ror":"https://ror.org/039t4wk02","country_code":"DE","type":"facility","lineage":["https://openalex.org/I114112103","https://openalex.org/I4210136150","https://openalex.org/I94509681"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Henrik M\u00fcller","raw_affiliation_strings":["L3S Research Center, Leibniz University Hannover, Appelstr. 9a, Hannover, 30167, Lower Saxony, Germany"],"raw_orcid":"https://orcid.org/0000-0002-5719-4278","affiliations":[{"raw_affiliation_string":"L3S Research Center, Leibniz University Hannover, Appelstr. 9a, Hannover, 30167, Lower Saxony, Germany","institution_ids":["https://openalex.org/I4210136150","https://openalex.org/I114112103"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114070774","display_name":"L.-E. Berg","orcid":null},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]},{"id":"https://openalex.org/I4210136150","display_name":"L3S Research Center","ror":"https://ror.org/039t4wk02","country_code":"DE","type":"facility","lineage":["https://openalex.org/I114112103","https://openalex.org/I4210136150","https://openalex.org/I94509681"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Lukas Berg","raw_affiliation_strings":["L3S Research Center, Leibniz University Hannover, Appelstr. 9a, Hannover, 30167, Lower Saxony, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"L3S Research Center, Leibniz University Hannover, Appelstr. 9a, Hannover, 30167, Lower Saxony, Germany","institution_ids":["https://openalex.org/I4210136150","https://openalex.org/I114112103"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009587907","display_name":"Daniel Kudenko\u22c6","orcid":"https://orcid.org/0000-0003-3359-3255"},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]},{"id":"https://openalex.org/I4210136150","display_name":"L3S Research Center","ror":"https://ror.org/039t4wk02","country_code":"DE","type":"facility","lineage":["https://openalex.org/I114112103","https://openalex.org/I4210136150","https://openalex.org/I94509681"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Daniel Kudenko","raw_affiliation_strings":["L3S Research Center, Leibniz University Hannover, Appelstr. 9a, Hannover, 30167, Lower Saxony, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"L3S Research Center, Leibniz University Hannover, Appelstr. 9a, Hannover, 30167, Lower Saxony, Germany","institution_ids":["https://openalex.org/I4210136150","https://openalex.org/I114112103"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102954057"],"corresponding_institution_ids":["https://openalex.org/I114112103","https://openalex.org/I4210136150"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00242548,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"37","issue":"23","first_page":"18851","last_page":"18866"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.9422000050544739,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9405999779701233,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computational-science-and-engineering","display_name":"Computational Science and Engineering","score":0.7787106037139893},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7543882131576538},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6999742984771729},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6341650485992432},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.593114972114563},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5216876864433289},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.44734877347946167},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1436755657196045},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.061614423990249634}],"concepts":[{"id":"https://openalex.org/C68597687","wikidata":"https://www.wikidata.org/wiki/Q362601","display_name":"Computational Science and Engineering","level":2,"score":0.7787106037139893},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7543882131576538},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6999742984771729},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6341650485992432},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.593114972114563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5216876864433289},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44734877347946167},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1436755657196045},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.061614423990249634},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s00521-024-10615-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-024-10615-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-024-10615-2.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s00521-024-10615-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-024-10615-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-024-10615-2.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3613717405","display_name":null,"funder_award_id":"45FGU121_E","funder_id":"https://openalex.org/F4320310476","funder_display_name":"Bundesministerium f\u00fcr Verkehr und Digitale Infrastruktur"},{"id":"https://openalex.org/G367552324","display_name":null,"funder_award_id":"01DD20003","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"}],"funders":[{"id":"https://openalex.org/F4320310476","display_name":"Bundesministerium f\u00fcr Verkehr und Digitale Infrastruktur","ror":"https://ror.org/00e3ns026"},{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"},{"id":"https://openalex.org/F4320322379","display_name":"Gottfried Wilhelm Leibniz Universit\u00e4t Hannover","ror":"https://ror.org/0304hq317"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4406271625.pdf"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W1592847719","https://openalex.org/W2107297861","https://openalex.org/W2111316871","https://openalex.org/W2606126444","https://openalex.org/W2899455150","https://openalex.org/W2902391430","https://openalex.org/W2911293880","https://openalex.org/W2911825241","https://openalex.org/W2912083425","https://openalex.org/W2914782308","https://openalex.org/W2963709735","https://openalex.org/W2964514675","https://openalex.org/W2964654516","https://openalex.org/W3034833075","https://openalex.org/W3037476194","https://openalex.org/W3175128928","https://openalex.org/W3175871729","https://openalex.org/W3203592351","https://openalex.org/W4281727664","https://openalex.org/W4281856560","https://openalex.org/W4285599953","https://openalex.org/W4298427539"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Abstract":[0],"Reinforcement":[1],"learning":[2],"(RL)":[3],"agents":[4,47],"naturally":[5],"struggle":[6],"with":[7,150,239,250],"long-sequence":[8],"sparse-reward":[9],"tasks":[10,50,135,249],"due":[11],"to":[12,30,43,58,109,113,119,132,140,148,165,186,190,206,215,225],"the":[13,21,25,32,54,87,111,141,192,202,216,227,231],"lack":[14],"of":[15,23,77,117,176,234,253],"reward":[16,67,103,209],"feedback":[17],"during":[18],"exploration":[19],"and":[20,95,124,129,168,223,242],"problem":[22],"identifying":[24],"necessary":[26],"action":[27,180],"sequences":[28],"required":[29],"reach":[31,226],"goal.":[33,228],"Previous":[34],"works":[35],"have":[36],"used":[37,205],"abstract":[38,88,126,197],"symbolic":[39,79,89,127,198],"task":[40,55,194],"knowledge":[41,80],"models":[42,90],"speed":[44],"up":[45],"RL":[46,118,217],"in":[48,96,122],"these":[49],"by":[51,62,74,93,183,201],"either":[52],"splitting":[53],"into":[56],"easier":[57],"solve":[59,133],"sub-tasks":[60],"or":[61,171,255],"creating":[63],"an":[64,173],"artificial":[65],"dense":[66,208],"function.":[68],"These":[69],"approaches":[70],"are":[71,91,204],"often":[72],"limited":[73],"their":[75],"requirement":[76],"perfect":[78],"models,":[81],"which":[82,106,211],"cannot":[83],"be":[84,221],"guaranteed":[85],"when":[86],"provided":[92],"humans":[94],"real-world":[97],"tasks.":[98],"We":[99,178,229],"introduce":[100],"exponential":[101],"plan-based":[102],"shaping":[104],",":[105],"is":[107,146],"able":[108,147],"leverage":[110],"ability":[112],"learn":[114],"from":[115],"experience":[116],"compensate":[120],"deficiencies":[121],"incomplete":[123,254],"incorrect":[125,174,256],"plans":[128,151,189,238],"use":[130,179],"them":[131],"difficult":[134],"faster,":[136],"while":[137],"guaranteeing":[138],"convergence":[139],"optimal":[142],"policy.":[143],"Our":[144],"approach":[145,236],"work":[149],"that":[152,162,219],"miss":[153],"important":[154,167,213],"steps,":[155,159],"include":[156],"unnecessary":[157],"extra":[158],"contain":[160],"steps":[161,241],"refer":[163],"ambiguously":[164],"both":[166],"useless":[169],"states,":[170],"encode":[172],"order":[175],"steps.":[177],"representations":[181],"designed":[182],"human":[184],"experts":[185],"automatically":[187],"compute":[188],"capture":[191],"high-level":[193],"structure.":[195],"The":[196],"subgoals":[199],"defined":[200],"plan":[203],"create":[207],"feedback,":[210],"signals":[212],"states":[214],"agent":[218],"should":[220],"achieved":[222],"explored":[224],"show":[230,243],"theoretical":[232],"advantages":[233],"our":[235],"for":[237],"many":[240],"its":[244],"effectiveness":[245],"empirically":[246],"on":[247],"multiple":[248],"different":[251],"kinds":[252],"knowledge.":[257]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
