{"id":"https://openalex.org/W2795921402","doi":"https://doi.org/10.1109/devlrn.2017.8329799","title":"Parallel reward and punishment control in humans and robots: Safe reinforcement learning using the MaxPain algorithm","display_name":"Parallel reward and punishment control in humans and robots: Safe reinforcement learning using the MaxPain algorithm","publication_year":2017,"publication_date":"2017-09-01","ids":{"openalex":"https://openalex.org/W2795921402","doi":"https://doi.org/10.1109/devlrn.2017.8329799","mag":"2795921402"},"language":"en","primary_location":{"id":"doi:10.1109/devlrn.2017.8329799","is_oa":false,"landing_page_url":"https://doi.org/10.1109/devlrn.2017.8329799","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 Joint IEEE International Conference on Development and Learning and Epigenetic Robotics (ICDL-EpiRob)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.17863/cam.33366","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068305732","display_name":"Stefan Elfwing","orcid":"https://orcid.org/0000-0001-6689-1000"},"institutions":[{"id":"https://openalex.org/I4210104143","display_name":"Advanced Telecommunications Research Institute International","ror":"https://ror.org/01pe1d703","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210104143"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Stefan Elfwing","raw_affiliation_strings":["Dept. of Brain Robot Interface, ATR Computational Neuroscience Laboratories, Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Dept. of Brain Robot Interface, ATR Computational Neuroscience Laboratories, Kyoto, Japan","institution_ids":["https://openalex.org/I4210104143"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011993307","display_name":"Ben Seymour","orcid":"https://orcid.org/0000-0003-1724-5832"},"institutions":[{"id":"https://openalex.org/I4210104143","display_name":"Advanced Telecommunications Research Institute International","ror":"https://ror.org/01pe1d703","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210104143"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ben Seymour","raw_affiliation_strings":["Dept. of Brain Robot Interface, ATR Computational Neuroscience Laboratories, Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Dept. of Brain Robot Interface, ATR Computational Neuroscience Laboratories, Kyoto, Japan","institution_ids":["https://openalex.org/I4210104143"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5068305732"],"corresponding_institution_ids":["https://openalex.org/I4210104143"],"apc_list":null,"apc_paid":null,"fwci":1.8496,"has_fulltext":false,"cited_by_count":38,"citation_normalized_percentile":{"value":0.85662209,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"140","last_page":"147"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10042","display_name":"Neural and Behavioral Psychology Studies","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10042","display_name":"Neural and Behavioral Psychology Studies","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10056","display_name":"Neurotransmitter Receptor Influence on Behavior","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/2804","display_name":"Cellular and Molecular Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13283","display_name":"Mental Health Research Topics","score":0.9828000068664551,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7910721302032471},{"id":"https://openalex.org/keywords/punishment","display_name":"Punishment (psychology)","score":0.7834137678146362},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6769515872001648},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6617531776428223},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.622205376625061},{"id":"https://openalex.org/keywords/safer","display_name":"SAFER","score":0.6125504970550537},{"id":"https://openalex.org/keywords/argument","display_name":"Argument (complex analysis)","score":0.5629450678825378},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5140730142593384},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4908927083015442},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.48077696561813354},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.47803792357444763},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.44735491275787354},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.43848296999931335},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.24847954511642456},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.1340140998363495},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11604335904121399},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.09907299280166626}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7910721302032471},{"id":"https://openalex.org/C2779295839","wikidata":"https://www.wikidata.org/wiki/Q3544090","display_name":"Punishment (psychology)","level":2,"score":0.7834137678146362},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6769515872001648},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6617531776428223},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.622205376625061},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.6125504970550537},{"id":"https://openalex.org/C98184364","wikidata":"https://www.wikidata.org/wiki/Q1780131","display_name":"Argument (complex analysis)","level":2,"score":0.5629450678825378},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5140730142593384},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4908927083015442},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.48077696561813354},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.47803792357444763},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.44735491275787354},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.43848296999931335},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.24847954511642456},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.1340140998363495},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11604335904121399},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.09907299280166626},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/devlrn.2017.8329799","is_oa":false,"landing_page_url":"https://doi.org/10.1109/devlrn.2017.8329799","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 Joint IEEE International Conference on Development and Learning and Epigenetic Robotics (ICDL-EpiRob)","raw_type":"proceedings-article"},{"id":"pmh:oai:generic.eprints.org:933647","is_oa":false,"landing_page_url":"http://publications.eng.cam.ac.uk/933647/","pdf_url":null,"source":{"id":"https://openalex.org/S4406922847","display_name":"Cambridge University Engineering Department Publications Database","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:www.repository.cam.ac.uk:1810/286048","is_oa":false,"landing_page_url":"https://www.repository.cam.ac.uk/handle/1810/286048","pdf_url":null,"source":{"id":"https://openalex.org/S4306401777","display_name":"Apollo (University of Cambridge)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I241749","host_organization_name":"University of Cambridge","host_organization_lineage":["https://openalex.org/I241749"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.17863/cam.33366","is_oa":true,"landing_page_url":"https://doi.org/10.17863/cam.33366","pdf_url":null,"source":{"id":"https://openalex.org/S7407050737","display_name":"Apollo","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.17863/cam.33366","is_oa":true,"landing_page_url":"https://doi.org/10.17863/cam.33366","pdf_url":null,"source":{"id":"https://openalex.org/S7407050737","display_name":"Apollo","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.550000011920929,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320311904","display_name":"Wellcome Trust","ror":"https://ror.org/029chgv08"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W34142835","https://openalex.org/W1008056019","https://openalex.org/W1840625103","https://openalex.org/W1845972764","https://openalex.org/W1865886258","https://openalex.org/W1983180567","https://openalex.org/W2002865226","https://openalex.org/W2004167381","https://openalex.org/W2007414406","https://openalex.org/W2023732657","https://openalex.org/W2037226859","https://openalex.org/W2046713808","https://openalex.org/W2048732459","https://openalex.org/W2060846151","https://openalex.org/W2081030963","https://openalex.org/W2082070038","https://openalex.org/W2083058253","https://openalex.org/W2084912121","https://openalex.org/W2091161547","https://openalex.org/W2091637698","https://openalex.org/W2093820536","https://openalex.org/W2096452841","https://openalex.org/W2099341654","https://openalex.org/W2100677568","https://openalex.org/W2101075098","https://openalex.org/W2111768113","https://openalex.org/W2113913482","https://openalex.org/W2116085129","https://openalex.org/W2117428849","https://openalex.org/W2117726420","https://openalex.org/W2121863487","https://openalex.org/W2127959107","https://openalex.org/W2128521145","https://openalex.org/W2129478155","https://openalex.org/W2150643588","https://openalex.org/W2153960487","https://openalex.org/W2169206416","https://openalex.org/W2338986187","https://openalex.org/W2478951598","https://openalex.org/W2522172843","https://openalex.org/W3041202696","https://openalex.org/W3104013016","https://openalex.org/W3106238320","https://openalex.org/W3125893104","https://openalex.org/W3139377883","https://openalex.org/W3171586585","https://openalex.org/W4214717370","https://openalex.org/W4232292906","https://openalex.org/W4301831348","https://openalex.org/W6601411472","https://openalex.org/W6639175102","https://openalex.org/W6677435986","https://openalex.org/W6792155000"],"related_works":["https://openalex.org/W2953205341","https://openalex.org/W2092643327","https://openalex.org/W235065745","https://openalex.org/W2029935773","https://openalex.org/W2787754950","https://openalex.org/W1572215850","https://openalex.org/W1985775355","https://openalex.org/W2352115286","https://openalex.org/W2476350415","https://openalex.org/W4256172809"],"abstract_inverted_index":{"An":[0],"important":[1],"issue":[2],"in":[3,43,61,110,138,146,177,221,237,239],"reinforcement":[4],"learning":[5,26,106,153,201],"systems":[6,18],"for":[7,19,56,84,228],"autonomous":[8],"agents":[9],"is":[10,80],"whether":[11,78],"it":[12,72],"makes":[13],"sense":[14],"to":[15,65,77,155,162,206],"have":[16],"separate":[17,54],"predicting":[20],"rewards":[21],"and":[22,27,87,140,164,185,200,223,235,242],"punishments.":[23],"In":[24,193,210],"robotics,":[25],"control":[28],"are":[29,202],"typically":[30],"achieved":[31],"by":[32],"a":[33,53,95,121,126,147,182,225],"single":[34],"controller,":[35],"with":[36],"punishments":[37],"coded":[38],"as":[39,76,157,159,233],"negative":[40,70],"rewards.":[41],"However":[42],"biological":[44,66],"systems,":[45],"some":[46],"evidence":[47],"suggests":[48],"that":[49,105],"the":[50,74,103,116,143,189,212,215],"brain":[51],"has":[52],"system":[55],"punishment.":[57],"Although":[58],"this":[59,99,167,172],"may":[60],"part":[62],"be":[63],"due":[64],"constraints":[67],"of":[68,188,217],"implementing":[69],"quantities,":[71],"raises":[73],"question":[75],"there":[79],"any":[81],"computational":[82],"rationale":[83],"keeping":[85],"reward":[86],"punishment":[88,219,231],"prediction":[89,220],"operationally":[90],"distinct.":[91],"Here":[92],"we":[93,130,195],"outline":[94],"basic":[96],"argument":[97],"supporting":[98],"idea,":[100],"based":[101],"on":[102],"proposition":[104],"best-case":[107],"predictions":[108,137,145],"(as":[109],"Q-learning)":[111],"does":[112],"not":[113,161],"always":[114],"achieve":[115],"safest":[117],"behaviour.":[118,209],"We":[119,169],"introduce":[120],"modified":[122],"RL":[123,149,179],"scheme":[124,173],"involving":[125],"new":[127],"algorithm":[128],"which":[129,134],"call":[131],"'MaxPain'":[132],"-":[133],"back-ups":[135],"worst-case":[136],"parallel,":[139],"then":[141,165],"scales":[142],"two":[144],"multiattribute":[148],"policy.":[150],"i.e.":[151],"independently":[152],"`what":[154,160],"do'":[156,163],"well":[158],"combining":[166],"information.":[168],"show":[170],"how":[171,197],"can":[174],"improve":[175],"performance":[176],"benchmark":[178],"environments,":[180],"including":[181],"grid-world":[183],"experiment":[184],"delayed":[186],"version":[187],"mountain":[190],"car":[191],"experiment.":[192],"particular,":[194],"demonstrate":[196],"early":[198],"exploration":[199],"substantially":[203],"improved,":[204],"leading":[205],"much":[207],"`safer'":[208],"conclusion,":[211],"results":[213],"illustrate":[214],"importance":[216],"independent":[218],"RL,":[222],"provide":[224],"testable":[226],"framework":[227],"better":[229],"understanding":[230],"(such":[232],"pain)":[234],"avoidance":[236],"humans,":[238],"both":[240],"health":[241],"disease.":[243]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
