{"id":"https://openalex.org/W3158250864","doi":"https://doi.org/10.1109/ijcnn52387.2021.9533996","title":"Constraint-Guided Reinforcement Learning: Augmenting the Agent-Environment-Interaction","display_name":"Constraint-Guided Reinforcement Learning: Augmenting the Agent-Environment-Interaction","publication_year":2021,"publication_date":"2021-07-18","ids":{"openalex":"https://openalex.org/W3158250864","doi":"https://doi.org/10.1109/ijcnn52387.2021.9533996","mag":"3158250864"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn52387.2021.9533996","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9533996","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2104.11918","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005226780","display_name":"Helge Spieker","orcid":"https://orcid.org/0000-0003-2494-4279"},"institutions":[{"id":"https://openalex.org/I2799829267","display_name":"Simula Research Laboratory","ror":"https://ror.org/00vn06n10","country_code":"NO","type":"facility","lineage":["https://openalex.org/I2799829267"]}],"countries":["NO"],"is_corresponding":true,"raw_author_name":"Helge Spieker","raw_affiliation_strings":["Simula Research Laboratory,Fornebu,Norway","Simula Research Laboratory;"],"affiliations":[{"raw_affiliation_string":"Simula Research Laboratory,Fornebu,Norway","institution_ids":["https://openalex.org/I2799829267"]},{"raw_affiliation_string":"Simula Research Laboratory;","institution_ids":["https://openalex.org/I2799829267"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5005226780"],"corresponding_institution_ids":["https://openalex.org/I2799829267"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05284396,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8695904016494751},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7565563917160034},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.6610028743743896},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.562464714050293},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5233563780784607},{"id":"https://openalex.org/keywords/safer","display_name":"SAFER","score":0.48960596323013306},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4736432135105133},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44279155135154724},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.42407935857772827},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.41756346821784973},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.15807583928108215},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11026349663734436},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.08967837691307068}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8695904016494751},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7565563917160034},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.6610028743743896},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.562464714050293},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5233563780784607},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.48960596323013306},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4736432135105133},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44279155135154724},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.42407935857772827},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.41756346821784973},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.15807583928108215},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11026349663734436},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.08967837691307068},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/ijcnn52387.2021.9533996","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9533996","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2104.11918","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.11918","pdf_url":"https://arxiv.org/pdf/2104.11918","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3158250864","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2104.11918.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2104.11918","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.11918","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2104.11918","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.11918","pdf_url":"https://arxiv.org/pdf/2104.11918","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4140855681","display_name":null,"funder_award_id":"825619","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1845972764","https://openalex.org/W1977655452","https://openalex.org/W2097113539","https://openalex.org/W2607264901","https://openalex.org/W2736601468","https://openalex.org/W2787908307","https://openalex.org/W2892364115","https://openalex.org/W2942608247","https://openalex.org/W2962803570","https://openalex.org/W2962991582","https://openalex.org/W2963277051","https://openalex.org/W2963575966","https://openalex.org/W2963771109","https://openalex.org/W2964043796","https://openalex.org/W2964340170","https://openalex.org/W2964627913","https://openalex.org/W2966735560","https://openalex.org/W3091691175","https://openalex.org/W3100789280","https://openalex.org/W3123298421","https://openalex.org/W6692846177","https://openalex.org/W6718092244","https://openalex.org/W6736495275","https://openalex.org/W6737893269","https://openalex.org/W6744502524","https://openalex.org/W6751535212","https://openalex.org/W6755612348","https://openalex.org/W6779886872","https://openalex.org/W6807360898"],"related_works":["https://openalex.org/W3201285065","https://openalex.org/W2973029245","https://openalex.org/W2757927221","https://openalex.org/W2135995480","https://openalex.org/W1998786997","https://openalex.org/W2944481713","https://openalex.org/W2082244867","https://openalex.org/W3093342898","https://openalex.org/W3198127929","https://openalex.org/W3211667558","https://openalex.org/W3112977404","https://openalex.org/W3145768065","https://openalex.org/W2588415587","https://openalex.org/W3092156675","https://openalex.org/W2994642062","https://openalex.org/W41069182","https://openalex.org/W3156093577","https://openalex.org/W2009988656","https://openalex.org/W1998727078","https://openalex.org/W2787145444"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"agents":[3,22,61],"have":[4],"great":[5],"successes":[6],"in":[7,51],"solving":[8],"tasks":[9],"with":[10,68,155],"large":[11],"observation":[12],"and":[13,25,36,91,138,150,169],"action":[14],"spaces":[15],"from":[16],"limited":[17],"feedback.":[18],"Still,":[19],"training":[20],"the":[21,31,42,48,57,63,74,81,84,99,107,111,115,117,135],"is":[23,34,87,102],"data-intensive":[24],"there":[26],"are":[27],"no":[28],"guarantees":[29],"that":[30,94,162],"learned":[32],"behavior":[33],"safe":[35,78],"does":[37,164],"not":[38,103],"violate":[39],"rules":[40],"of":[41,59,65],"environment,":[43],"which":[44],"has":[45],"limitations":[46],"for":[47,131],"practical":[49],"deployment":[50],"real-world":[52],"scenarios.":[53],"This":[54],"paper":[55],"discusses":[56],"engineering":[58],"reliable":[60],"via":[62],"integration":[64,129],"deep":[66],"RL":[67,75,85,108,136],"constraint-based":[69],"augmentation":[70],"models":[71,119],"to":[72,89,97,123],"guide":[73],"agent":[76,86,109],"towards":[77],"behavior.":[79],"Within":[80],"constraints":[82],"set,":[83],"free":[88],"adapt":[90],"explore,":[92],"such":[93],"its":[95],"effectiveness":[96],"solve":[98],"given":[100],"problem":[101],"hindered.":[104],"However,":[105],"once":[106],"leaves":[110],"space":[112],"defined":[113],"by":[114],"constraints,":[116],"outside":[118],"can":[120],"provide":[121,166],"guidance":[122,133],"still":[124],"work":[125],"reliably.":[126],"We":[127],"discuss":[128],"points":[130],"constraint":[132],"within":[134],"process":[137],"perform":[139],"experiments":[140],"on":[141],"two":[142],"case":[143],"studies:":[144],"a":[145,151],"strictly":[146],"constrained":[147],"card":[148],"game":[149],"grid":[152],"world":[153],"environment":[154],"additional":[156],"combinatorial":[157],"subgoals.":[158],"Our":[159],"results":[160],"show":[161],"constraint-guidance":[163],"both":[165],"reliability":[167],"improvements":[168],"safer":[170],"behavior,":[171],"as":[172,174],"well":[173],"accelerated":[175],"training.":[176]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
