{"id":"https://openalex.org/W2002305926","doi":"https://doi.org/10.1109/ijcnn.2014.6889732","title":"Multi-objectivization of reinforcement learning problems by reward shaping","display_name":"Multi-objectivization of reinforcement learning problems by reward shaping","publication_year":2014,"publication_date":"2014-07-01","ids":{"openalex":"https://openalex.org/W2002305926","doi":"https://doi.org/10.1109/ijcnn.2014.6889732","mag":"2002305926"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2014.6889732","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2014.6889732","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084002137","display_name":"Tim Brys","orcid":null},"institutions":[{"id":"https://openalex.org/I13469542","display_name":"Vrije Universiteit Brussel","ror":"https://ror.org/006e5kg04","country_code":"BE","type":"education","lineage":["https://openalex.org/I13469542"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Tim Brys","raw_affiliation_strings":["A1 Lab at the Vrije Universiteit, Brussel, Belgium","AI Lab. at the Vrije Univ. Brussel, Brussels, Belgium"],"affiliations":[{"raw_affiliation_string":"A1 Lab at the Vrije Universiteit, Brussel, Belgium","institution_ids":["https://openalex.org/I13469542"]},{"raw_affiliation_string":"AI Lab. at the Vrije Univ. Brussel, Brussels, Belgium","institution_ids":["https://openalex.org/I13469542"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014267939","display_name":"Anna Harutyunyan","orcid":"https://orcid.org/0000-0002-5418-113X"},"institutions":[{"id":"https://openalex.org/I13469542","display_name":"Vrije Universiteit Brussel","ror":"https://ror.org/006e5kg04","country_code":"BE","type":"education","lineage":["https://openalex.org/I13469542"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Anna Harutyunyan","raw_affiliation_strings":["AI Lab at the Vrije Universiteit Br\u00fcssel, Belgium","AI Lab. at the Vrije Univ. Brussel, Brussels, Belgium"],"affiliations":[{"raw_affiliation_string":"AI Lab at the Vrije Universiteit Br\u00fcssel, Belgium","institution_ids":["https://openalex.org/I13469542"]},{"raw_affiliation_string":"AI Lab. at the Vrije Univ. Brussel, Brussels, Belgium","institution_ids":["https://openalex.org/I13469542"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062896285","display_name":"Peter Vrancx","orcid":"https://orcid.org/0000-0002-9876-3684"},"institutions":[{"id":"https://openalex.org/I13469542","display_name":"Vrije Universiteit Brussel","ror":"https://ror.org/006e5kg04","country_code":"BE","type":"education","lineage":["https://openalex.org/I13469542"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Peter Vrancx","raw_affiliation_strings":["A1 Lab at the Vrije Universiteit, Brussel, Belgium","AI Lab. at the Vrije Univ. Brussel, Brussels, Belgium"],"affiliations":[{"raw_affiliation_string":"A1 Lab at the Vrije Universiteit, Brussel, Belgium","institution_ids":["https://openalex.org/I13469542"]},{"raw_affiliation_string":"AI Lab. at the Vrije Univ. Brussel, Brussels, Belgium","institution_ids":["https://openalex.org/I13469542"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070914351","display_name":"Matthew E. Taylor","orcid":"https://orcid.org/0000-0001-8946-0211"},"institutions":[{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew E. Taylor","raw_affiliation_strings":["Washington State University, Pullman, WA, US","Washington State University, Pullman,WA USA"],"affiliations":[{"raw_affiliation_string":"Washington State University, Pullman, WA, US","institution_ids":["https://openalex.org/I72951846"]},{"raw_affiliation_string":"Washington State University, Pullman,WA USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009587907","display_name":"Daniel Kudenko\u22c6","orcid":"https://orcid.org/0000-0003-3359-3255"},"institutions":[{"id":"https://openalex.org/I192455969","display_name":"York University","ror":"https://ror.org/05fq50484","country_code":"CA","type":"education","lineage":["https://openalex.org/I192455969"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Daniel Kudenko","raw_affiliation_strings":["Reinforcement Learning Group at York University","[Reinforcement Learning Group at York University]"],"affiliations":[{"raw_affiliation_string":"Reinforcement Learning Group at York University","institution_ids":["https://openalex.org/I192455969"]},{"raw_affiliation_string":"[Reinforcement Learning Group at York University]","institution_ids":["https://openalex.org/I192455969"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064553018","display_name":"Ann Now\u00e9","orcid":"https://orcid.org/0000-0001-6346-4564"},"institutions":[{"id":"https://openalex.org/I13469542","display_name":"Vrije Universiteit Brussel","ror":"https://ror.org/006e5kg04","country_code":"BE","type":"education","lineage":["https://openalex.org/I13469542"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Ann Nowe","raw_affiliation_strings":["A1 Lab at the Vrije Universiteit, Brussel, Belgium","AI Lab. at the Vrije Univ. Brussel, Brussels, Belgium"],"affiliations":[{"raw_affiliation_string":"A1 Lab at the Vrije Universiteit, Brussel, Belgium","institution_ids":["https://openalex.org/I13469542"]},{"raw_affiliation_string":"AI Lab. at the Vrije Univ. Brussel, Brussels, Belgium","institution_ids":["https://openalex.org/I13469542"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5084002137"],"corresponding_institution_ids":["https://openalex.org/I13469542"],"apc_list":null,"apc_paid":null,"fwci":10.5504,"has_fulltext":false,"cited_by_count":60,"citation_normalized_percentile":{"value":0.98265006,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2315","last_page":"2322"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.897350549697876},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6525442004203796},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.6377260088920593},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.5765599012374878},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.5035259127616882},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.49172094464302063},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48130905628204346},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.47817403078079224},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.46504369378089905},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.44812577962875366},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4286395311355591},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.41282787919044495},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3880890905857086},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3108956217765808},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.28161144256591797},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16461870074272156},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.12691140174865723},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10833743214607239}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.897350549697876},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6525442004203796},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.6377260088920593},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5765599012374878},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.5035259127616882},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.49172094464302063},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48130905628204346},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.47817403078079224},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.46504369378089905},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.44812577962875366},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4286395311355591},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.41282787919044495},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3880890905857086},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3108956217765808},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.28161144256591797},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16461870074272156},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.12691140174865723},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10833743214607239},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/ijcnn.2014.6889732","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2014.6889732","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:vubissmart:VUBISSMART:2000:224379","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306402573","display_name":"VUBIR (Vrije Universiteit Brussel)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I13469542","host_organization_name":"Vrije Universiteit Brussel","host_organization_lineage":["https://openalex.org/I13469542"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:vubissmart:VUBISSMART:2000:87055","is_oa":false,"landing_page_url":"https://biblio.vub.ac.be/vubir/multiobjectivization-of-reinforcement-learning-problems-by-reward-shaping(ba68d604-db97-471a-8746-aa008f794aaf).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306402573","display_name":"VUBIR (Vrije Universiteit Brussel)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I13469542","host_organization_name":"Vrije Universiteit Brussel","host_organization_lineage":["https://openalex.org/I13469542"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7099999785423279,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W173958421","https://openalex.org/W1499408472","https://openalex.org/W1533644244","https://openalex.org/W1539701879","https://openalex.org/W1585603966","https://openalex.org/W1585711251","https://openalex.org/W1777239053","https://openalex.org/W1833338034","https://openalex.org/W1965681731","https://openalex.org/W1968933161","https://openalex.org/W2012612381","https://openalex.org/W2043806097","https://openalex.org/W2050149892","https://openalex.org/W2050439249","https://openalex.org/W2058192020","https://openalex.org/W2060846151","https://openalex.org/W2073297125","https://openalex.org/W2081944499","https://openalex.org/W2095564494","https://openalex.org/W2102660061","https://openalex.org/W2113496026","https://openalex.org/W2113913482","https://openalex.org/W2115668428","https://openalex.org/W2117428849","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2147750403","https://openalex.org/W2155872498","https://openalex.org/W2157263597","https://openalex.org/W2198041288","https://openalex.org/W2201042428","https://openalex.org/W2404716917","https://openalex.org/W3011120880","https://openalex.org/W3103262232","https://openalex.org/W4214717370","https://openalex.org/W4233696721","https://openalex.org/W4285719527","https://openalex.org/W6638088447","https://openalex.org/W6683110158","https://openalex.org/W6687764432","https://openalex.org/W6713551735"],"related_works":["https://openalex.org/W4400868993","https://openalex.org/W3096874164","https://openalex.org/W1985560493","https://openalex.org/W2937181779","https://openalex.org/W2357975469","https://openalex.org/W2145363145","https://openalex.org/W1626977535","https://openalex.org/W2341346307","https://openalex.org/W3168977894","https://openalex.org/W2386410636"],"abstract_inverted_index":{"Multi-objectivization":[0],"is":[1,79,101,174],"the":[2,21,29,34,42,51,63,70,93,110,127,132,151,162,170,182,196],"process":[3],"of":[4,23,53,65,72,165,172,184],"transforming":[5],"a":[6,11,59,80,192],"single":[7,153],"objective":[8,31,154],"problem":[9,36,155,194],"into":[10],"multi-objective":[12],"problem.":[13,45],"Research":[14],"in":[15,92,139,176],"evolutionary":[16],"optimization":[17],"has":[18],"demonstrated":[19],"that":[20,25,142,161],"addition":[22],"objectives":[24],"are":[26,122],"correlated":[27,125,145],"with":[28,126],"original":[30,43],"can":[32,147],"make":[33],"resulting":[35,97],"easier":[37],"to":[38,41,82,103,112,149],"solve":[39,150],"compared":[40],"single-objective":[44],"In":[46],"this":[47,140,177,185],"paper":[48,141],"we":[49,137],"investigate":[50],"multi-objectivization":[52,64],"reinforcement":[54,85,189],"learning":[55,86,190],"problems.":[56],"We":[57,159],"propose":[58],"novel":[60],"method":[61],"for":[62,131],"Markov":[66],"Decision":[67],"problems":[68],"through":[69],"use":[71],"multiple":[73],"reward":[74,94,99,119,134],"shaping":[75,78,120],"functions.":[76],"Reward":[77],"technique":[81],"speed":[83],"up":[84],"by":[87,123,168],"including":[88],"additional":[89],"heuristic":[90],"knowledge":[91],"signal.":[95],"The":[96],"composite":[98],"signal":[100],"expected":[102],"be":[104],"more":[105,116],"informative":[106],"during":[107],"learning,":[108],"leading":[109],"learner":[111],"identify":[113],"good":[114],"actions":[115],"quickly.":[117],"Good":[118],"functions":[121],"definition":[124],"target":[128],"value":[129],"function":[130],"base":[133],"signal,":[135],"and":[136,157,167,179,195],"show":[138],"adding":[143],"several":[144],"signals":[146],"help":[148],"basic":[152],"faster":[156],"better.":[158],"prove":[160],"total":[163],"ordering":[164],"solutions,":[166,173],"consequence":[169],"optimality":[171],"preserved":[175],"process,":[178],"empirically":[180],"demonstrate":[181],"usefulness":[183],"approach":[186],"on":[187],"two":[188],"tasks:":[191],"pathfinding":[193],"Mario":[197],"domain.":[198]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
