{"id":"https://openalex.org/W3203207428","doi":"https://doi.org/10.1007/s00521-021-06259-1","title":"Policy invariant explicit shaping: an efficient alternative to reward shaping","display_name":"Policy invariant explicit shaping: an efficient alternative to reward shaping","publication_year":2021,"publication_date":"2021-09-28","ids":{"openalex":"https://openalex.org/W3203207428","doi":"https://doi.org/10.1007/s00521-021-06259-1","mag":"3203207428"},"language":"en","primary_location":{"id":"doi:10.1007/s00521-021-06259-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-021-06259-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-021-06259-1.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s00521-021-06259-1.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049923588","display_name":"Paniz Behboudian","orcid":null},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Paniz Behboudian","raw_affiliation_strings":["Alberta Machine Intelligence Institute, Edmonton, Alberta, Canada","Department of Computing Science, University of Alberta, Edmonton, Alberta, Canada"],"affiliations":[{"raw_affiliation_string":"Alberta Machine Intelligence Institute, Edmonton, Alberta, Canada","institution_ids":[]},{"raw_affiliation_string":"Department of Computing Science, University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012858965","display_name":"Yash Satsangi","orcid":"https://orcid.org/0000-0002-6726-4065"},"institutions":[{"id":"https://openalex.org/I193700539","display_name":"Tilburg University","ror":"https://ror.org/04b8v1s79","country_code":"NL","type":"education","lineage":["https://openalex.org/I193700539"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Yash Satsangi","raw_affiliation_strings":["Department of Cognitive Science and Artificial Intelligence, Tilburg University, Tilburg, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Department of Cognitive Science and Artificial Intelligence, Tilburg University, Tilburg, The Netherlands","institution_ids":["https://openalex.org/I193700539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070914351","display_name":"Matthew E. Taylor","orcid":"https://orcid.org/0000-0001-8946-0211"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Matthew E. Taylor","raw_affiliation_strings":["Alberta Machine Intelligence Institute, Edmonton, Alberta, Canada","Department of Computing Science, University of Alberta, Edmonton, Alberta, Canada"],"affiliations":[{"raw_affiliation_string":"Alberta Machine Intelligence Institute, Edmonton, Alberta, Canada","institution_ids":[]},{"raw_affiliation_string":"Department of Computing Science, University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014267939","display_name":"Anna Harutyunyan","orcid":"https://orcid.org/0000-0002-5418-113X"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Anna Harutyunyan","raw_affiliation_strings":["DeepMind, London, England"],"affiliations":[{"raw_affiliation_string":"DeepMind, London, England","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081163135","display_name":"Michael Bowling","orcid":"https://orcid.org/0000-0003-2960-8418"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Michael Bowling","raw_affiliation_strings":["Alberta Machine Intelligence Institute, Edmonton, Alberta, Canada","DeepMind, Edmonton, Alberta, Canada","Department of Computing Science, University of Alberta, Edmonton, Alberta, Canada"],"affiliations":[{"raw_affiliation_string":"Alberta Machine Intelligence Institute, Edmonton, Alberta, Canada","institution_ids":[]},{"raw_affiliation_string":"DeepMind, Edmonton, Alberta, Canada","institution_ids":[]},{"raw_affiliation_string":"Department of Computing Science, University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5049923588"],"corresponding_institution_ids":["https://openalex.org/I154425047"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.5592,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.73688772,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"34","issue":"3","first_page":"1673","last_page":"1686"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.979200005531311,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.810316264629364},{"id":"https://openalex.org/keywords/advice","display_name":"Advice (programming)","score":0.7501373887062073},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6675611138343811},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.6370962858200073},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5473366975784302},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4359322488307953},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.42742565274238586},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4109658896923065},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1937704086303711}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.810316264629364},{"id":"https://openalex.org/C2779955035","wikidata":"https://www.wikidata.org/wiki/Q4686785","display_name":"Advice (programming)","level":2,"score":0.7501373887062073},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6675611138343811},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.6370962858200073},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5473366975784302},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4359322488307953},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.42742565274238586},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4109658896923065},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1937704086303711},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s00521-021-06259-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-021-06259-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-021-06259-1.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s00521-021-06259-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-021-06259-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-021-06259-1.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.46000000834465027,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G2165548363","display_name":null,"funder_award_id":"Canada","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"},{"id":"https://openalex.org/G3033334534","display_name":null,"funder_award_id":"AI Chair","funder_id":"https://openalex.org/F4320309949","funder_display_name":"Canadian Institute for Advanced Research"},{"id":"https://openalex.org/G5784215521","display_name":null,"funder_award_id":"Chair","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"}],"funders":[{"id":"https://openalex.org/F4320309949","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95"},{"id":"https://openalex.org/F4320314000","display_name":"Compute Canada","ror":"https://ror.org/03ty8yr27"},{"id":"https://openalex.org/F4320314212","display_name":"Alberta Machine Intelligence Institute","ror":null},{"id":"https://openalex.org/F4320319946","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27"},{"id":"https://openalex.org/F4320321487","display_name":"Canadian Network for Research and Innovation in Machining Technology, Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3203207428.pdf","grobid_xml":"https://content.openalex.org/works/W3203207428.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W46130386","https://openalex.org/W142858861","https://openalex.org/W1499408472","https://openalex.org/W1502099479","https://openalex.org/W1557517019","https://openalex.org/W1600437712","https://openalex.org/W1777239053","https://openalex.org/W1902261929","https://openalex.org/W2002305926","https://openalex.org/W2110064869","https://openalex.org/W2116157560","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2130750514","https://openalex.org/W2150339816","https://openalex.org/W2151382427","https://openalex.org/W2156869222","https://openalex.org/W2158969944","https://openalex.org/W2161009228","https://openalex.org/W2164419340","https://openalex.org/W2165664253","https://openalex.org/W2202549229","https://openalex.org/W2294422333","https://openalex.org/W2625456521","https://openalex.org/W2788455270","https://openalex.org/W2982316857","https://openalex.org/W3103379718","https://openalex.org/W4234438384","https://openalex.org/W4298023569"],"related_works":["https://openalex.org/W2145363145","https://openalex.org/W2341346307","https://openalex.org/W2154399718","https://openalex.org/W4321463377","https://openalex.org/W1914583973","https://openalex.org/W2130711276","https://openalex.org/W4308828368","https://openalex.org/W3088331655","https://openalex.org/W1504584981","https://openalex.org/W3038962357"],"abstract_inverted_index":{"Abstract":[0],"Reinforcement":[1],"learning":[2,7,31,171,185],"(RL)":[3],"is":[4,49,90,99,113],"a":[5,93,119,135,221],"powerful":[6],"paradigm":[8],"in":[9,28,191],"which":[10],"agents":[11],"can":[12,32,65,80,183,237],"learn":[13,54,81],"to":[14,50,53,69,101,127,130,157,206],"maximize":[15],"sparse":[16],"and":[17,140,178,230,233,243],"delayed":[18],"reward":[19,76,89,107],"signals.":[20],"Although":[21],"RL":[22,47],"has":[23,60],"had":[24],"many":[25],"impressive":[26],"successes":[27],"complex":[29],"domains,":[30],"take":[33,131],"hours,":[34],"days,":[35],"or":[36,137],"even":[37],"years":[38],"of":[39,45,169],"training":[40],"data.":[41,57,85],"A":[42],"major":[43],"challenge":[44,160],"contemporary":[46],"research":[48],"discover":[51],"how":[52],"with":[55,82,186,215],"less":[56,84],"Previous":[58],"work":[59],"shown":[61],"that":[62,200,235],"domain":[63],"information":[64],"be":[66,102,128],"successfully":[67],"used":[68],"shape":[70],"the":[71,78,88,96,116,145,163,170,194,203,245],"reward;":[72],"by":[73,115,161],"adding":[74,202],"additional":[75],"information,":[77],"agent":[79,139],"much":[83],"Furthermore,":[86],"if":[87],"constructed":[91],"from":[92,134],"potential":[94,121,164],"function,":[95],"optimal":[97,146,195,246],"policy":[98,225,247],"guaranteed":[100],"unaltered.":[103],"While":[104],"such":[105],"potential-based":[106,152],"shaping":[108,214,228],"(PBRS)":[109],"holds":[110],"promise,":[111],"it":[112,189,209],"limited":[114],"need":[117],"for":[118],"well-defined":[120],"function.":[122],"Ideally,":[123],"we":[124],"would":[125],"like":[126],"able":[129],"arbitrary":[132,239],"advice":[133,153],"human":[136],"other":[138],"improve":[141],"performance":[142],"without":[143],"affecting":[144],"policy.":[147,196],"The":[148],"recently":[149],"introduced":[150],"dynamic":[151],"(DPBA)":[154],"was":[155],"proposed":[156],"tackle":[158],"this":[159,174],"predicting":[162],"function":[165],"values":[166],"as":[167],"part":[168],"process.":[172],"However,":[173],"article":[175],"demonstrates":[176],"theoretically":[177,232],"empirically":[179,234],"that,":[180],"while":[181],"DPBA":[182,208],"facilitate":[184],"good":[187,216],"advice,":[188,240],"does":[190],"fact":[192],"alter":[193],"We":[197,218],"further":[198],"show":[199,231],"when":[201],"correction":[204],"term":[205],"\u201cfix\u201d":[207],"no":[210],"longer":[211],"shows":[212],"effective":[213],"advice.":[217],"then":[219],"present":[220],"simple":[222],"method":[223],"called":[224],"invariant":[226],"explicit":[227],"(PIES)":[229],"PIES":[236],"use":[238],"speed-up":[241],"learning,":[242],"leave":[244],"unchanged.":[248]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2025-10-10T00:00:00"}
