{"id":"https://openalex.org/W2035737909","doi":"https://doi.org/10.1177/1059712308092835","title":"Co-evolution of Shaping Rewards and Meta-Parameters in Reinforcement Learning","display_name":"Co-evolution of Shaping Rewards and Meta-Parameters in Reinforcement Learning","publication_year":2008,"publication_date":"2008-11-13","ids":{"openalex":"https://openalex.org/W2035737909","doi":"https://doi.org/10.1177/1059712308092835","mag":"2035737909"},"language":"en","primary_location":{"id":"doi:10.1177/1059712308092835","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1059712308092835","pdf_url":null,"source":{"id":"https://openalex.org/S183337005","display_name":"Adaptive Behavior","issn_l":"1059-7123","issn":["1059-7123","1741-2633"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Adaptive Behavior","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068305732","display_name":"Stefan Elfwing","orcid":"https://orcid.org/0000-0001-6689-1000"},"institutions":[{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]},{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["JP","SE"],"is_corresponding":true,"raw_author_name":"Stefan Elfwing","raw_affiliation_strings":["Centre for Autonomous Systems, Numerical Analysis and Computer Science, KTH, Sweden, Neural Computation Unit, Okinawa Institute of Science and Technology, Japan,","Centre for Autonomous Systems, Numerical Analysis andComputer Science, KTH, Sweden, Neural Computation Unit, Okinawa Institute of Scienceand Technology, Japan#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Autonomous Systems, Numerical Analysis and Computer Science, KTH, Sweden, Neural Computation Unit, Okinawa Institute of Science and Technology, Japan,","institution_ids":["https://openalex.org/I142637625"]},{"raw_affiliation_string":"Centre for Autonomous Systems, Numerical Analysis andComputer Science, KTH, Sweden, Neural Computation Unit, Okinawa Institute of Scienceand Technology, Japan#TAB#","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031054137","display_name":"Eiji Uchibe","orcid":"https://orcid.org/0000-0001-7908-0258"},"institutions":[{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Eiji Uchibe","raw_affiliation_strings":["Neural Computation Unit, Okinawa Institute of Science and Technology, Japan,","Neural Computation Unit, Okinawa Institute of Scienceand Technology, Japan#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Neural Computation Unit, Okinawa Institute of Science and Technology, Japan,","institution_ids":["https://openalex.org/I142637625"]},{"raw_affiliation_string":"Neural Computation Unit, Okinawa Institute of Scienceand Technology, Japan#TAB#","institution_ids":["https://openalex.org/I142637625"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004840638","display_name":"Kenji Doya","orcid":"https://orcid.org/0000-0002-2446-6820"},"institutions":[{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Kenji Doya","raw_affiliation_strings":["Neural Computation Unit, Okinawa Institute of Science and Technology, Japan,","Neural Computation Unit, Okinawa Institute of Scienceand Technology, Japan#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Neural Computation Unit, Okinawa Institute of Science and Technology, Japan,","institution_ids":["https://openalex.org/I142637625"]},{"raw_affiliation_string":"Neural Computation Unit, Okinawa Institute of Scienceand Technology, Japan#TAB#","institution_ids":["https://openalex.org/I142637625"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066237365","display_name":"Henrik I. Christensen","orcid":"https://orcid.org/0000-0002-7465-7502"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Henrik I. Christensen","raw_affiliation_strings":["Centre for Autonomous Systems, Numerical Analysis and Computer Science, KTH, Sweden,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Autonomous Systems, Numerical Analysis and Computer Science, KTH, Sweden,","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5004840638","https://openalex.org/A5031054137","https://openalex.org/A5066237365","https://openalex.org/A5068305732"],"corresponding_institution_ids":["https://openalex.org/I142637625","https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":4.6994,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.94444678,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"16","issue":"6","first_page":"400","last_page":"412"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11663","display_name":"Viral Infectious Diseases and Gene Expression in Insects","score":0.9646999835968018,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8396166563034058},{"id":"https://openalex.org/keywords/action-selection","display_name":"Action selection","score":0.7349474430084229},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.6475400924682617},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6240816712379456},{"id":"https://openalex.org/keywords/meta-learning","display_name":"Meta learning (computer science)","score":0.6176168918609619},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.593821108341217},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5192406177520752},{"id":"https://openalex.org/keywords/foraging","display_name":"Foraging","score":0.5011296272277832},{"id":"https://openalex.org/keywords/animal-learning","display_name":"Animal learning","score":0.47331345081329346},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.47273480892181396},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.44064953923225403},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4178374111652374},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3793431520462036},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.28875261545181274},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1706736981868744},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1663724184036255},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12148928642272949},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.10622605681419373},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.10154110193252563}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8396166563034058},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.7349474430084229},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.6475400924682617},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6240816712379456},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.6176168918609619},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.593821108341217},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5192406177520752},{"id":"https://openalex.org/C165287380","wikidata":"https://www.wikidata.org/wiki/Q2916569","display_name":"Foraging","level":2,"score":0.5011296272277832},{"id":"https://openalex.org/C2992566924","wikidata":"https://www.wikidata.org/wiki/Q499210","display_name":"Animal learning","level":2,"score":0.47331345081329346},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.47273480892181396},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.44064953923225403},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4178374111652374},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3793431520462036},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.28875261545181274},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1706736981868744},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1663724184036255},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12148928642272949},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.10622605681419373},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.10154110193252563},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1177/1059712308092835","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1059712308092835","pdf_url":null,"source":{"id":"https://openalex.org/S183337005","display_name":"Adaptive Behavior","issn_l":"1059-7123","issn":["1059-7123","1741-2633"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Adaptive Behavior","raw_type":"journal-article"},{"id":"pmh:oai:smartech.gatech.edu:1853/38251","is_oa":false,"landing_page_url":"http://repository.gatech.edu/bitstreams/a09b02ee-bbcf-42c5-9665-edf215fb8bdc/download","pdf_url":null,"source":{"id":"https://openalex.org/S4377196313","display_name":"SMARTech Repository (Georgia Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I130701444","host_organization_name":"Georgia Institute of Technology","host_organization_lineage":["https://openalex.org/I130701444"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.865.2903","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.865.2903","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://adb.sagepub.com/content/16/6/400.full.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W121239838","https://openalex.org/W1499408472","https://openalex.org/W1507591516","https://openalex.org/W1515851193","https://openalex.org/W1564326237","https://openalex.org/W1608391772","https://openalex.org/W1617610651","https://openalex.org/W1769460679","https://openalex.org/W1777239053","https://openalex.org/W1914583973","https://openalex.org/W1971010625","https://openalex.org/W1976051517","https://openalex.org/W1996847178","https://openalex.org/W2010835231","https://openalex.org/W2075323224","https://openalex.org/W2079247031","https://openalex.org/W2111116266","https://openalex.org/W2113913482","https://openalex.org/W2116339921","https://openalex.org/W2121863487","https://openalex.org/W2124175081","https://openalex.org/W2125523964","https://openalex.org/W2130750514","https://openalex.org/W2151803317","https://openalex.org/W2158969944","https://openalex.org/W2171426878","https://openalex.org/W2198041288","https://openalex.org/W2583413467","https://openalex.org/W2998349125","https://openalex.org/W3103379718","https://openalex.org/W3139377883","https://openalex.org/W4214717370"],"related_works":["https://openalex.org/W3107204728","https://openalex.org/W4287591324","https://openalex.org/W3108503355","https://openalex.org/W3090555870","https://openalex.org/W4226420367","https://openalex.org/W2962876041","https://openalex.org/W4200081355","https://openalex.org/W3022820045","https://openalex.org/W3081944365","https://openalex.org/W2135179174"],"abstract_inverted_index":{"In":[0],"this":[1],"article,":[2],"we":[3],"explore":[4],"an":[5],"evolutionary":[6],"approach":[7,26,117],"to":[8,27,37,49,62,135],"the":[9,29,50,53,75,79,82,86,90,98,103,115,121],"optimization":[10],"of":[11,32,55,81,93],"potential-based":[12],"shaping":[13,127],"rewards":[14,21,45,95,128],"and":[15,41,97,107,129],"meta-parameters":[16,72],"in":[17,52,109,118,133],"reinforcement":[18,33],"learning.":[19],"Shaping":[20,44],"is":[22],"a":[23,70,138],"frequently":[24],"used":[25],"increase":[28],"learning":[30,61,66,76,87],"performance":[31,40],"learning,":[34],"with":[35],"regards":[36],"both":[38],"initial":[39],"convergence":[42],"speed.":[43],"provide":[46],"additional":[47],"knowledge":[48],"agent":[51],"form":[54],"richer":[56],"reward":[57],"signals,":[58],"which":[59],"guide":[60],"high-rewarding":[63],"states.":[64],"Reinforcement":[65],"depends":[67],"critically":[68],"on":[69],"few":[71],"that":[73,101],"modulate":[74],"updates":[77],"or":[78],"exploration":[80,106],"environment,":[83],"such":[84],"as":[85],"rate":[88],"\u03b1,":[89],"discount":[91],"factor":[92],"future":[94],"\u03b3,":[96],"temperature":[99],"\u03c4":[100],"controls":[102],"trade-off":[104],"between":[105],"exploitation":[108],"softmax":[110],"action":[111],"selection.":[112],"We":[113,124],"validate":[114],"proposed":[116],"simulation":[119],"using":[120,137],"mountain-car":[122],"task.":[123,141],"also":[125],"transfer":[126],"meta-parameters,":[130],"evolutionarily":[131],"obtained":[132],"simulation,":[134],"hardware,":[136],"robotic":[139],"foraging":[140]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
