{"id":"https://openalex.org/W3200681020","doi":"https://doi.org/10.1109/ijcnn52387.2021.9533682","title":"Reinforcement Learning with Potential Functions Trained to Discriminate Good and Bad States","display_name":"Reinforcement Learning with Potential Functions Trained to Discriminate Good and Bad States","publication_year":2021,"publication_date":"2021-07-18","ids":{"openalex":"https://openalex.org/W3200681020","doi":"https://doi.org/10.1109/ijcnn52387.2021.9533682","mag":"3200681020"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn52387.2021.9533682","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9533682","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research.rug.nl/en/publications/9830c7fe-bcd3-4e91-8a39-0a015aaf141d","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048747705","display_name":"Yifei Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Yifei Chen","raw_affiliation_strings":["Bernoulli Institute for Mathematics, Computer Science and Artificial Intelligence, University of Groningen, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Bernoulli Institute for Mathematics, Computer Science and Artificial Intelligence, University of Groningen, The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008655688","display_name":"Hamidreza Kasaei","orcid":"https://orcid.org/0000-0001-9408-7730"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Hamidreza Kasaei","raw_affiliation_strings":["Bernoulli Institute for Mathematics, Computer Science and Artificial Intelligence, University of Groningen, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Bernoulli Institute for Mathematics, Computer Science and Artificial Intelligence, University of Groningen, The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028858025","display_name":"Lambert Schomaker","orcid":"https://orcid.org/0000-0003-2351-930X"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Lambert Schomaker","raw_affiliation_strings":["Bernoulli Institute for Mathematics, Computer Science and Artificial Intelligence, University of Groningen, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Bernoulli Institute for Mathematics, Computer Science and Artificial Intelligence, University of Groningen, The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060596453","display_name":"Marco Wiering","orcid":"https://orcid.org/0000-0003-4331-7537"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Marco Wiering","raw_affiliation_strings":["Bernoulli Institute for Mathematics, Computer Science and Artificial Intelligence, University of Groningen, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Bernoulli Institute for Mathematics, Computer Science and Artificial Intelligence, University of Groningen, The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5048747705"],"corresponding_institution_ids":["https://openalex.org/I169381384"],"apc_list":null,"apc_paid":null,"fwci":0.14,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.56437208,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9776999950408936,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9166933298110962},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7549070119857788},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6754293441772461},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.6083698272705078},{"id":"https://openalex.org/keywords/perceptron","display_name":"Perceptron","score":0.556540310382843},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.49101710319519043},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.48266294598579407},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4483613967895508},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.42126256227493286},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.38024821877479553},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08500638604164124}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9166933298110962},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7549070119857788},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6754293441772461},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.6083698272705078},{"id":"https://openalex.org/C60908668","wikidata":"https://www.wikidata.org/wiki/Q690207","display_name":"Perceptron","level":3,"score":0.556540310382843},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.49101710319519043},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.48266294598579407},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4483613967895508},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.42126256227493286},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.38024821877479553},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08500638604164124},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ijcnn52387.2021.9533682","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9533682","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.rug.nl:openaire/9830c7fe-bcd3-4e91-8a39-0a015aaf141d","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/9830c7fe-bcd3-4e91-8a39-0a015aaf141d","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Chen, Y, Kasaei, H, Schomaker, L & Wiering, M 2021, Reinforcement Learning with Potential Functions Trained to Discriminate Good and Bad States. in 2021 International Joint Conference on Neural Networks (IJCNN)., 9533682, IEEE, pp. 1-7, 2021 International Joint Conference on Neural Networks (IJCNN), 18/07/2021. https://doi.org/10.1109/IJCNN52387.2021.9533682","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.rug.nl:openaire/9830c7fe-bcd3-4e91-8a39-0a015aaf141d","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/9830c7fe-bcd3-4e91-8a39-0a015aaf141d","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Chen, Y, Kasaei, H, Schomaker, L & Wiering, M 2021, Reinforcement Learning with Potential Functions Trained to Discriminate Good and Bad States. in 2021 International Joint Conference on Neural Networks (IJCNN)., 9533682, IEEE, pp. 1-7, 2021 International Joint Conference on Neural Networks (IJCNN), 18/07/2021. https://doi.org/10.1109/IJCNN52387.2021.9533682","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"score":0.6600000262260437,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G7023170290","display_name":null,"funder_award_id":"201806320353","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G8589651859","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"}],"funders":[{"id":"https://openalex.org/F4320320933","display_name":"Rijksuniversiteit Groningen","ror":"https://ror.org/012p63287"},{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"},{"id":"https://openalex.org/F4320337373","display_name":"Center for Information Technology","ror":"https://ror.org/03jh5a977"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W1130790960","https://openalex.org/W1487219635","https://openalex.org/W1499408472","https://openalex.org/W1522301498","https://openalex.org/W1547136369","https://openalex.org/W1553476745","https://openalex.org/W1626977535","https://openalex.org/W1641643976","https://openalex.org/W1777239053","https://openalex.org/W1977655452","https://openalex.org/W1996847178","https://openalex.org/W2026724928","https://openalex.org/W2041367235","https://openalex.org/W2065356613","https://openalex.org/W2095564494","https://openalex.org/W2103626435","https://openalex.org/W2121863487","https://openalex.org/W2130750514","https://openalex.org/W2145339207","https://openalex.org/W2147750403","https://openalex.org/W2151382427","https://openalex.org/W2156387975","https://openalex.org/W2158969944","https://openalex.org/W2161009228","https://openalex.org/W2164419340","https://openalex.org/W2165131254","https://openalex.org/W2173564293","https://openalex.org/W2202549229","https://openalex.org/W2218252352","https://openalex.org/W2491675558","https://openalex.org/W2552800523","https://openalex.org/W2561776174","https://openalex.org/W2580909119","https://openalex.org/W2583528914","https://openalex.org/W2596982695","https://openalex.org/W2803973384","https://openalex.org/W2897039614","https://openalex.org/W2899205164","https://openalex.org/W2914261249","https://openalex.org/W2921889099","https://openalex.org/W2949475445","https://openalex.org/W2951799221","https://openalex.org/W2963160877","https://openalex.org/W2964067469","https://openalex.org/W2964121744","https://openalex.org/W3003040443","https://openalex.org/W3011120880","https://openalex.org/W3103379718","https://openalex.org/W3128125614","https://openalex.org/W4214717370","https://openalex.org/W4214807090","https://openalex.org/W4233696721","https://openalex.org/W4234438384","https://openalex.org/W4293743321","https://openalex.org/W4297576238","https://openalex.org/W6629027733","https://openalex.org/W6631190155","https://openalex.org/W6636799442","https://openalex.org/W6636868823","https://openalex.org/W6638088447","https://openalex.org/W6682205418","https://openalex.org/W6682889407","https://openalex.org/W6684159546","https://openalex.org/W6685444567","https://openalex.org/W6687713970","https://openalex.org/W6689158983","https://openalex.org/W6722970529","https://openalex.org/W6729861744","https://openalex.org/W6730641667","https://openalex.org/W6732951832","https://openalex.org/W6735033012","https://openalex.org/W6756303580","https://openalex.org/W6758978475","https://openalex.org/W6790319397"],"related_works":["https://openalex.org/W2742483371","https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W3087814763","https://openalex.org/W4376605461","https://openalex.org/W4400868993","https://openalex.org/W2361647908","https://openalex.org/W2537866915"],"abstract_inverted_index":{"Reward":[0],"shaping":[1,27,32,52],"is":[2,17,82,114,136,151],"an":[3,68,104,149],"efficient":[4],"way":[5],"to":[6,21,60,130,155,162],"incorporate":[7],"domain":[8],"knowledge":[9,24],"into":[10],"a":[11,62,71,174],"reinforcement":[12,72,203],"learning":[13,30,73,204],"agent.":[14],"Nev-ertheless,":[15],"it":[16],"unpractical":[18],"and":[19,91,108,126,179],"inconvenient":[20],"require":[22],"prior":[23],"for":[25],"designing":[26],"rewards.":[28],"Therefore,":[29],"the":[31,36,49,76,79,97,118,132,140,167,192,198,201],"reward":[33,51],"function":[34,64,81,101,122,143,196],"by":[35,84],"agent":[37,69,105],"during":[38],"training":[39,67,103],"could":[40],"be":[41],"more":[42],"effective.":[43],"In":[44,75,147],"this":[45],"paper,":[46],"based":[47],"on":[48],"potential-based":[50],"framework,":[53],"which":[54,116,138],"guarantees":[55],"policy":[56],"invariance,":[57],"we":[58],"propose":[59],"learn":[61],"potential":[63,80,100,121,142,195],"concurrently":[65],"with":[66,106,124,144],"using":[70],"algorithm.":[74],"proposed":[77,98,193],"method,":[78],"trained":[83],"examining":[85],"states":[86,158],"that":[87,191],"occur":[88],"in":[89,92,153,171],"good":[90],"bad":[93],"episodes.":[94],"We":[95,165],"apply":[96],"adaptive":[99,194],"while":[102],"Q-learning":[107,125],"develop":[109],"two":[110],"novel":[111,141],"algorithms.":[112,205],"One":[113],"APF-QMLP,":[115],"applies":[117],"good/bad":[119],"state":[120,185],"combined":[123],"multi-layer":[127],"perceptrons":[128],"(MLPs)":[129],"estimate":[131],"Q-function.":[133],"The":[134,187],"other":[135],"APF-Dueling-DQN,":[137],"combines":[139],"Dueling":[145],"DQN.":[146],"particular,":[148],"autoencoder":[150],"adopted":[152],"APF-Dueling-DQN":[154],"map":[156],"image":[157],"from":[159],"Atari":[160],"games":[161],"hash":[163],"codes.":[164],"evaluated":[166],"created":[168],"algorithms":[169],"empirically":[170],"four":[172],"environments:":[173],"six-room":[175],"maze,":[176],"CartPole,":[177],"Acrobot,":[178],"Ms-Pacman,":[180],"involving":[181],"low-dimensional":[182],"or":[183],"high-dimensional":[184],"spaces.":[186],"experimental":[188],"results":[189],"showed":[190],"improved":[197],"performances":[199],"of":[200],"selected":[202]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}