{"id":"https://openalex.org/W3151813343","doi":"https://doi.org/10.1007/s00521-021-06184-3","title":"Opponent Learning Awareness and Modelling in Multi-Objective Normal Form Games","display_name":"Opponent Learning Awareness and Modelling in Multi-Objective Normal Form Games","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3151813343","doi":"https://doi.org/10.1007/s00521-021-06184-3","mag":"3151813343"},"language":"en","primary_location":{"id":"pmh:oai:vubissmart:VUBISSMART:2000:151988","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s00521-021-06184-3","pdf_url":null,"source":{"id":"https://openalex.org/S4306402573","display_name":"VUBIR (Vrije Universiteit Brussel)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I13469542","host_organization_name":"Vrije Universiteit Brussel","host_organization_lineage":["https://openalex.org/I13469542"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"publishedVersion"},"type":"article","indexed_in":[],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008556334","display_name":"Roxana R\u0103dulescu","orcid":"https://orcid.org/0000-0003-1446-5514"},"institutions":[{"id":"https://openalex.org/I13469542","display_name":"Vrije Universiteit Brussel","ror":"https://ror.org/006e5kg04","country_code":"BE","type":"education","lineage":["https://openalex.org/I13469542"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Roxana Radulescu","raw_affiliation_strings":["Artificial Intelligence Lab, Vrije Universiteit Brussel, Brussels, Belgium"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Lab, Vrije Universiteit Brussel, Brussels, Belgium","institution_ids":["https://openalex.org/I13469542"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5008556334"],"corresponding_institution_ids":["https://openalex.org/I13469542"],"apc_list":null,"apc_paid":null,"fwci":2.0989,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.89145542,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10646","display_name":"Experimental Behavioral Economics Studies","score":0.9717000126838684,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9616000056266785,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fictitious-play","display_name":"Fictitious play","score":0.7364742755889893},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7301448583602905},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6948930025100708},{"id":"https://openalex.org/keywords/stochastic-game","display_name":"Stochastic game","score":0.6889911890029907},{"id":"https://openalex.org/keywords/adversary","display_name":"Adversary","score":0.6303627490997314},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.5161231756210327},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5083569884300232},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3637327551841736},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.33098965883255005},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17845743894577026}],"concepts":[{"id":"https://openalex.org/C145071142","wikidata":"https://www.wikidata.org/wiki/Q1411116","display_name":"Fictitious play","level":3,"score":0.7364742755889893},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7301448583602905},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6948930025100708},{"id":"https://openalex.org/C22171661","wikidata":"https://www.wikidata.org/wiki/Q1074380","display_name":"Stochastic game","level":2,"score":0.6889911890029907},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.6303627490997314},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.5161231756210327},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5083569884300232},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3637327551841736},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.33098965883255005},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17845743894577026},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"pmh:oai:vubissmart:VUBISSMART:2000:151988","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s00521-021-06184-3","pdf_url":null,"source":{"id":"https://openalex.org/S4306402573","display_name":"VUBIR (Vrije Universiteit Brussel)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I13469542","host_organization_name":"Vrije Universiteit Brussel","host_organization_lineage":["https://openalex.org/I13469542"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G7451411756","display_name":null,"funder_award_id":"1S47617N","funder_id":"https://openalex.org/F4320321730","funder_display_name":"Fonds Wetenschappelijk Onderzoek"}],"funders":[{"id":"https://openalex.org/F4320321587","display_name":"Vlaamse Overheid","ror":"https://ror.org/04qxsrb28"},{"id":"https://openalex.org/F4320321730","display_name":"Fonds Wetenschappelijk Onderzoek","ror":"https://ror.org/03qtxy027"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1482010531","https://openalex.org/W1605188341","https://openalex.org/W1965520710","https://openalex.org/W1982813377","https://openalex.org/W2052766368","https://openalex.org/W2053488431","https://openalex.org/W2056099894","https://openalex.org/W2061562262","https://openalex.org/W2102660061","https://openalex.org/W2104602264","https://openalex.org/W2119717200","https://openalex.org/W2121863487","https://openalex.org/W2127842795","https://openalex.org/W2136919374","https://openalex.org/W2150905308","https://openalex.org/W2151268438","https://openalex.org/W2199385070","https://openalex.org/W2216069195","https://openalex.org/W2330024298","https://openalex.org/W2475089067","https://openalex.org/W2508573783","https://openalex.org/W2594890764","https://openalex.org/W2606733399","https://openalex.org/W2623431351","https://openalex.org/W2758442112","https://openalex.org/W2786614119","https://openalex.org/W2790380320","https://openalex.org/W2902922568","https://openalex.org/W2921294933","https://openalex.org/W2949644579","https://openalex.org/W2962903234","https://openalex.org/W2963407617","https://openalex.org/W2963627051","https://openalex.org/W2963689090","https://openalex.org/W2964251366","https://openalex.org/W2970971581","https://openalex.org/W2971634072","https://openalex.org/W2994648961","https://openalex.org/W3037942570","https://openalex.org/W3038190622","https://openalex.org/W3084824572","https://openalex.org/W3103262232","https://openalex.org/W3121938597","https://openalex.org/W3127589652","https://openalex.org/W3151813343","https://openalex.org/W4234761190","https://openalex.org/W4300577719"],"related_works":["https://openalex.org/W4287644830","https://openalex.org/W4302081789","https://openalex.org/W4214835929","https://openalex.org/W2963864634","https://openalex.org/W2890426576","https://openalex.org/W2588094729","https://openalex.org/W4226086326","https://openalex.org/W1991899349","https://openalex.org/W4315471847","https://openalex.org/W3119014777"],"abstract_inverted_index":{"Many":[0],"real-world":[1],"multi-agent":[2,64],"interactions":[3,65],"consider":[4,71],"multiple":[5],"distinct":[6],"criteria,":[7],"i.e.":[8],"the":[9,16,39,45,52,56,82,124,131,151],"payoffs":[10],"are":[11,159,174],"multi-objective":[12,18,63,73],"in":[13,44,103,137,154],"nature.":[14],"However,":[15],"same":[17],"payoff":[19],"vector":[20],"may":[21],"lead":[22],"to":[23,36,96,185,188],"different":[24,139],"utilities":[25],"for":[26,33],"each":[27],"participant.":[28],"Therefore,":[29],"it":[30],"is":[31],"essential":[32],"an":[34],"agent":[35],"learn":[37],"about":[38],"behaviour":[40],"of":[41,55,58,100,126],"other":[42],"agents":[43,168,184],"system.":[46],"In":[47],"this":[48,104,155],"work,":[49],"we":[50,70],"present":[51],"first":[53],"study":[54],"effects":[57],"such":[59],"opponent":[60,111,117,143,161,178],"modelling":[61,147,162,182],"on":[62,167],"with":[66,77,107,116],"nonlinear":[67,78],"utilities.":[68],"Specifically,":[69],"two-player":[72],"normal":[74],"form":[75],"games":[76],"utility":[79],"functions":[80],"under":[81],"scalarised":[83],"expected":[84],"returns":[85],"optimisation":[86],"criterion.":[87],"We":[88],"contribute":[89],"novel":[90],"actor-critic":[91],"and":[92,114,146,181],"policy":[93,112,128],"gradient":[94],"formulations":[95],"allow":[97],"reinforcement":[98],"learning":[99,115,118,121,133,144,152,179],"mixed":[101],"strategies":[102],"setting,":[105],"along":[106],"extensions":[108],"that":[109,142,169,191],"incorporate":[110],"reconstruction":[113],"awareness":[119,145,180],"(i.e.":[120],"while":[122],"considering":[123],"impact":[125],"one\u2019s":[127],"when":[129],"anticipating":[130],"opponent\u2019s":[132],"step).":[134],"Empirical":[135],"results":[136],"five":[138],"MONFGs":[140],"demonstrate":[141],"can":[148,163],"drastically":[149],"alter":[150],"dynamics":[153],"setting.":[156],"When":[157,172],"equilibria":[158],"present,":[160],"confer":[164],"significant":[165],"benefits":[166],"implement":[170],"it.":[171],"there":[173],"no":[175],"Nash":[176],"equilibria,":[177],"allows":[183],"still":[186],"converge":[187],"meaningful":[189],"solutions":[190],"approximate":[192],"equilibria.":[193]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":7}],"updated_date":"2026-05-14T06:16:12.342656","created_date":"2025-10-10T00:00:00"}
