{"id":"https://openalex.org/W2577085870","doi":"https://doi.org/10.1017/s0269888915000181","title":"Plan-based reward shaping for multi-agent reinforcement learning","display_name":"Plan-based reward shaping for multi-agent reinforcement learning","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2577085870","doi":"https://doi.org/10.1017/s0269888915000181","mag":"2577085870"},"language":"en","primary_location":{"id":"doi:10.1017/s0269888915000181","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0269888915000181","pdf_url":null,"source":{"id":"https://openalex.org/S137506714","display_name":"The Knowledge Engineering Review","issn_l":"0269-8889","issn":["0269-8889","1469-8005"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Knowledge Engineering Review","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048451922","display_name":"Sam Devlin","orcid":"https://orcid.org/0000-0002-7769-3090"},"institutions":[{"id":"https://openalex.org/I52099693","display_name":"University of York","ror":"https://ror.org/04m01e293","country_code":"GB","type":"education","lineage":["https://openalex.org/I52099693"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Sam Devlin","raw_affiliation_strings":["Department of Computer Science, University of York, York, YO10 5GH, England e-mail:"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of York, York, YO10 5GH, England e-mail:","institution_ids":["https://openalex.org/I52099693"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009587907","display_name":"Daniel Kudenko\u22c6","orcid":"https://orcid.org/0000-0003-3359-3255"},"institutions":[{"id":"https://openalex.org/I52099693","display_name":"University of York","ror":"https://ror.org/04m01e293","country_code":"GB","type":"education","lineage":["https://openalex.org/I52099693"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Daniel Kudenko","raw_affiliation_strings":["Department of Computer Science, University of York, York, YO10 5GH, England e-mail:"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of York, York, YO10 5GH, England e-mail:","institution_ids":["https://openalex.org/I52099693"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5048451922"],"corresponding_institution_ids":["https://openalex.org/I52099693"],"apc_list":null,"apc_paid":null,"fwci":1.7139,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.8966842,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"31","issue":"1","first_page":"44","last_page":"58"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9569000005722046,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9368000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8879556655883789},{"id":"https://openalex.org/keywords/plan","display_name":"Plan (archaeology)","score":0.6537661552429199},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.6245077848434448},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6174589991569519},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.60475093126297},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5897040963172913},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41787591576576233},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.16885897517204285},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.08483177423477173}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8879556655883789},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.6537661552429199},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.6245077848434448},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6174589991569519},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.60475093126297},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5897040963172913},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41787591576576233},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.16885897517204285},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.08483177423477173},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s0269888915000181","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0269888915000181","pdf_url":null,"source":{"id":"https://openalex.org/S137506714","display_name":"The Knowledge Engineering Review","issn_l":"0269-8889","issn":["0269-8889","1469-8005"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Knowledge Engineering Review","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7479391602","display_name":null,"funder_award_id":"EP/M023265/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W98142191","https://openalex.org/W119236796","https://openalex.org/W1499408472","https://openalex.org/W1543837481","https://openalex.org/W1553476745","https://openalex.org/W1569296262","https://openalex.org/W1641643976","https://openalex.org/W1777239053","https://openalex.org/W1848006316","https://openalex.org/W1986112535","https://openalex.org/W2095564494","https://openalex.org/W2098432798","https://openalex.org/W2104485423","https://openalex.org/W2104602264","https://openalex.org/W2111316871","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2151382427","https://openalex.org/W2156194062","https://openalex.org/W2158969944","https://openalex.org/W2159600763","https://openalex.org/W2166971042","https://openalex.org/W2330024298","https://openalex.org/W2334782222","https://openalex.org/W2342543862","https://openalex.org/W2406628960","https://openalex.org/W4214717370","https://openalex.org/W4234761190","https://openalex.org/W6636799442"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Abstract":[0],"Recent":[1],"theoretical":[2],"results":[3,74],"have":[4],"justified":[5],"the":[6,17,25,39,62,69,100,115],"use":[7,40,63],"of":[8,19,28,41,64,71,102,109,117],"potential-based":[9],"reward":[10,110],"shaping":[11,111],"as":[12,106],"a":[13,32,48,77,107],"way":[14],"to":[15,30,45],"improve":[16,89],"performance":[18,91],"multi-agent":[20],"reinforcement":[21,53],"learning":[22],"(MARL).":[23],"However,":[24],"question":[26],"remains":[27],"how":[29],"generate":[31,47],"useful":[33],"potential":[34,49,78],"function.":[35],"Previous":[36],"research":[37],"demonstrated":[38],"STRIPS":[42,65],"operator":[43],"knowledge":[44,67,86,105],"automatically":[46],"function":[50,79],"for":[51],"single-agent":[52],"learning.":[54],"Following":[55],"up":[56],"on":[57,81],"this":[58],"work,":[59],"we":[60,98],"investigate":[61,99],"planning":[66],"in":[68,112],"context":[70],"MARL.":[72],"Our":[73],"show":[75],"that":[76],"based":[80],"joint":[82],"or":[83],"individual":[84,103,118],"plan":[85,104],"can":[87],"significantly":[88],"MARL":[90],"compared":[92],"with":[93],"no":[94],"shaping.":[95],"In":[96],"addition,":[97],"limitations":[101],"source":[108],"cases":[113],"where":[114],"combination":[116],"agent":[119],"plans":[120],"causes":[121],"conflict.":[122]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2026-01-08T20:05:33.558190","created_date":"2025-10-10T00:00:00"}
