{"id":"https://openalex.org/W4213063346","doi":"https://doi.org/10.1145/3503823.3503905","title":"Shaping the Behavior of Reinforcement Learning Agents","display_name":"Shaping the Behavior of Reinforcement Learning Agents","publication_year":2021,"publication_date":"2021-11-26","ids":{"openalex":"https://openalex.org/W4213063346","doi":"https://doi.org/10.1145/3503823.3503905"},"language":"en","primary_location":{"id":"doi:10.1145/3503823.3503905","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503823.3503905","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"25th Pan-Hellenic Conference on Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.10375886","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067441830","display_name":"George Sidiropoulos","orcid":"https://orcid.org/0000-0002-3722-0934"},"institutions":[{"id":"https://openalex.org/I4210156054","display_name":"Athena Research and Innovation Center In Information Communication & Knowledge Technologies","ror":"https://ror.org/0576by029","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210156054"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"George Sidiropoulos","raw_affiliation_strings":["Athena-Research and Innovation Center in Information, Communication and Knowledge Technologies, Greece"],"affiliations":[{"raw_affiliation_string":"Athena-Research and Innovation Center in Information, Communication and Knowledge Technologies, Greece","institution_ids":["https://openalex.org/I4210156054"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016380733","display_name":"Chairi Kiourt","orcid":"https://orcid.org/0000-0001-8501-8899"},"institutions":[{"id":"https://openalex.org/I4210156054","display_name":"Athena Research and Innovation Center In Information Communication & Knowledge Technologies","ror":"https://ror.org/0576by029","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210156054"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Chairi Kiourt","raw_affiliation_strings":["Athena-Research and Innovation Center in Information, Communication and Knowledge Technologies, Greece"],"affiliations":[{"raw_affiliation_string":"Athena-Research and Innovation Center in Information, Communication and Knowledge Technologies, Greece","institution_ids":["https://openalex.org/I4210156054"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058515747","display_name":"Vasileios Sevetlidis","orcid":"https://orcid.org/0000-0001-9348-8786"},"institutions":[{"id":"https://openalex.org/I4210156054","display_name":"Athena Research and Innovation Center In Information Communication & Knowledge Technologies","ror":"https://ror.org/0576by029","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210156054"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Vasileios Sevetlidis","raw_affiliation_strings":["Athena-Research and Innovation Center in Information, Communication and Knowledge Technologies, Greece"],"affiliations":[{"raw_affiliation_string":"Athena-Research and Innovation Center in Information, Communication and Knowledge Technologies, Greece","institution_ids":["https://openalex.org/I4210156054"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003731715","display_name":"George Pavlidis","orcid":"https://orcid.org/0000-0002-9909-1584"},"institutions":[{"id":"https://openalex.org/I4210156054","display_name":"Athena Research and Innovation Center In Information Communication & Knowledge Technologies","ror":"https://ror.org/0576by029","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210156054"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"George Pavlidis","raw_affiliation_strings":["Athena-Research and Innovation Center in Information, Communication and Knowledge Technologies, Greece"],"affiliations":[{"raw_affiliation_string":"Athena-Research and Innovation Center in Information, Communication and Knowledge Technologies, Greece","institution_ids":["https://openalex.org/I4210156054"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5067441830"],"corresponding_institution_ids":["https://openalex.org/I4210156054"],"apc_list":null,"apc_paid":null,"fwci":0.4199,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.71227157,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"448","last_page":"453"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11252","display_name":"Evolutionary Game Theory and Cooperation","score":0.9642999768257141,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9589999914169312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8172056674957275},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6723980903625488},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5628563761711121},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5367732048034668},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5093997120857239},{"id":"https://openalex.org/keywords/swarm-behaviour","display_name":"Swarm behaviour","score":0.4500657320022583},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4398260712623596},{"id":"https://openalex.org/keywords/autonomous-agent","display_name":"Autonomous agent","score":0.4154934883117676},{"id":"https://openalex.org/keywords/crowd-simulation","display_name":"Crowd simulation","score":0.4113743305206299},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.23842516541481018},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.14687681198120117}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8172056674957275},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6723980903625488},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5628563761711121},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5367732048034668},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5093997120857239},{"id":"https://openalex.org/C181335050","wikidata":"https://www.wikidata.org/wiki/Q14915018","display_name":"Swarm behaviour","level":2,"score":0.4500657320022583},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4398260712623596},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.4154934883117676},{"id":"https://openalex.org/C45617602","wikidata":"https://www.wikidata.org/wiki/Q465266","display_name":"Crowd simulation","level":3,"score":0.4113743305206299},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.23842516541481018},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.14687681198120117},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C2777852691","wikidata":"https://www.wikidata.org/wiki/Q13430821","display_name":"Crowds","level":2,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3503823.3503905","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503823.3503905","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"25th Pan-Hellenic Conference on Informatics","raw_type":"proceedings-article"},{"id":"doi:10.5281/zenodo.10375886","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10375886","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.10375886","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10375886","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5099999904632568,"display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G3875748329","display_name":null,"funder_award_id":"European Regional Development Fund","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G6660225447","display_name":null,"funder_award_id":"co-financed","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G8538941428","display_name":null,"funder_award_id":"Regional Development Fund","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2026220725","https://openalex.org/W2041911815","https://openalex.org/W2560675820","https://openalex.org/W2563112808","https://openalex.org/W2810602713","https://openalex.org/W2855488022","https://openalex.org/W3072315125","https://openalex.org/W3099518626","https://openalex.org/W3104303852"],"related_works":["https://openalex.org/W4288714711","https://openalex.org/W3200708550","https://openalex.org/W2771637876","https://openalex.org/W2736680465","https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W1562393301","https://openalex.org/W2494202692","https://openalex.org/W2510991340","https://openalex.org/W2259224485"],"abstract_inverted_index":{"With":[0],"the":[1,26,39,62,78,109,113,120,123,126],"advent":[2],"of":[3,13,28,41,64,77,89,92,125],"machine":[4],"learning":[5,66],"and":[6,21,47,71,122],"agent-based":[7],"approaches,":[8],"behavior-shaping":[9],"in":[10,38,68,84,105],"environments":[11],"composed":[12],"several":[14],"autonomous":[15],"entities":[16],"has":[17],"become":[18],"a":[19,55,85],"popular":[20],"active":[22],"research":[23],"field":[24],"for":[25,60],"development":[27],"unique":[29],"realistic":[30,129],"behaviors.":[31],"Realistic":[32],"simulations":[33],"have":[34],"been":[35],"particularly":[36],"studied":[37],"fields":[40],"crowd":[42],"management,":[43],"swarm":[44],"behavior":[45,63,130],"analysis":[46],"civilization":[48],"simulation.":[49],"In":[50],"this":[51],"study,":[52],"we":[53],"present":[54],"new":[56],"dynamic":[57],"rewarding":[58,80],"approach":[59,81],"shaping":[61],"reinforcement":[65],"agents":[67],"mixed":[69],"(cooperative":[70],"competitive)":[72],"multi-agent":[73],"environments.":[74],"The":[75],"evaluation":[76],"proposed":[79],"is":[82],"tested":[83],"developed":[86],"3D":[87],"environment":[88],"two":[90],"groups":[91],"ancient":[93],"Greek":[94],"warriors":[95],"fighting":[96],"inside":[97],"an":[98],"octagonal":[99],"arena,":[100],"testing":[101],"different":[102],"agent":[103],"behaviors":[104,116],"various":[106],"scenarios.":[107],"Interestingly,":[108],"results":[110],"reveal":[111],"that":[112],"trained":[114],"agents\u2019":[115],"vary":[117],"based":[118],"on":[119],"situations":[121],"constraints":[124],"environment,":[127],"resembling":[128],"variations.":[131]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
