{"id":"https://openalex.org/W7138932594","doi":"https://doi.org/10.3390/a19030230","title":"A Multi-Agent Advisory Board Reinforcement Learning Framework for Adaptive Cooperative Control","display_name":"A Multi-Agent Advisory Board Reinforcement Learning Framework for Adaptive Cooperative Control","publication_year":2026,"publication_date":"2026-03-18","ids":{"openalex":"https://openalex.org/W7138932594","doi":"https://doi.org/10.3390/a19030230"},"language":"en","primary_location":{"id":"doi:10.3390/a19030230","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a19030230","pdf_url":"https://www.mdpi.com/1999-4893/19/3/230/pdf?version=1773837046","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/19/3/230/pdf?version=1773837046","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124452824","display_name":"Onur Osman","orcid":null},"institutions":[{"id":"https://openalex.org/I48912391","display_name":"Istanbul Technical University","ror":"https://ror.org/059636586","country_code":"TR","type":"education","lineage":["https://openalex.org/I48912391"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Onur Osman","raw_affiliation_strings":["Department of Electric Electronics Engineering, \u0130stanbul Topkapi University, 34087 Istanbul, T\u00fcrkiye"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electric Electronics Engineering, \u0130stanbul Topkapi University, 34087 Istanbul, T\u00fcrkiye","institution_ids":["https://openalex.org/I48912391"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036944001","display_name":"Tolga Kudret Karaca","orcid":null},"institutions":[{"id":"https://openalex.org/I67581229","display_name":"Istanbul University","ror":"https://ror.org/03a5qrr21","country_code":"TR","type":"education","lineage":["https://openalex.org/I67581229"]}],"countries":["TR"],"is_corresponding":true,"raw_author_name":"Tolga Kudret Karaca","raw_affiliation_strings":["Department of Industrial Engineering, \u0130stanbul Topkapi University, 34087 Istanbul, T\u00fcrkiye"],"raw_orcid":"https://orcid.org/0000-0001-5562-6367","affiliations":[{"raw_affiliation_string":"Department of Industrial Engineering, \u0130stanbul Topkapi University, 34087 Istanbul, T\u00fcrkiye","institution_ids":["https://openalex.org/I67581229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022659480","display_name":"Bahar Yal\u00e7\u0131n Kavu\u015f","orcid":"https://orcid.org/0000-0001-5295-1631"},"institutions":[{"id":"https://openalex.org/I250383648","display_name":"Izmir K\u00e2tip \u00c7elebi University","ror":"https://ror.org/024nx4843","country_code":"TR","type":"education","lineage":["https://openalex.org/I250383648"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Bahar Yalcin Kavus","raw_affiliation_strings":["Quality Coordination Office, \u0130zmir Katip \u00c7elebi University, 35620 Izmir, T\u00fcrkiye"],"raw_orcid":"https://orcid.org/0000-0001-5295-1631","affiliations":[{"raw_affiliation_string":"Quality Coordination Office, \u0130zmir Katip \u00c7elebi University, 35620 Izmir, T\u00fcrkiye","institution_ids":["https://openalex.org/I250383648"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124312340","display_name":"Gokalp Tulum","orcid":null},"institutions":[{"id":"https://openalex.org/I48912391","display_name":"Istanbul Technical University","ror":"https://ror.org/059636586","country_code":"TR","type":"education","lineage":["https://openalex.org/I48912391"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Gokalp Tulum","raw_affiliation_strings":["Department of Electric Electronics Engineering, \u0130stanbul Topkapi University, 34087 Istanbul, T\u00fcrkiye"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electric Electronics Engineering, \u0130stanbul Topkapi University, 34087 Istanbul, T\u00fcrkiye","institution_ids":["https://openalex.org/I48912391"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5122285633","display_name":"Sajjad Nematzadeh","orcid":null},"institutions":[{"id":"https://openalex.org/I67581229","display_name":"Istanbul University","ror":"https://ror.org/03a5qrr21","country_code":"TR","type":"education","lineage":["https://openalex.org/I67581229"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Sajjad Nematzadeh","raw_affiliation_strings":["Department of Computer Engineering, \u0130stanbul Topkapi University, 34087 Istanbul, T\u00fcrkiye"],"raw_orcid":"https://orcid.org/0000-0001-5064-2181","affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, \u0130stanbul Topkapi University, 34087 Istanbul, T\u00fcrkiye","institution_ids":["https://openalex.org/I67581229"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5036944001"],"corresponding_institution_ids":["https://openalex.org/I67581229"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.43323513,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"19","issue":"3","first_page":"230","last_page":"230"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.90829998254776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.90829998254776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.05999999865889549,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.0020000000949949026,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8536999821662903},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5906999707221985},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4887999892234802},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.4846000075340271},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4787999987602234},{"id":"https://openalex.org/keywords/advisory-committee","display_name":"Advisory committee","score":0.45669999718666077},{"id":"https://openalex.org/keywords/action-selection","display_name":"Action selection","score":0.42829999327659607}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8536999821662903},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6517000198364258},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5906999707221985},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4887999892234802},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4846000075340271},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4787999987602234},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47429999709129333},{"id":"https://openalex.org/C3017489713","wikidata":"https://www.wikidata.org/wiki/Q4686866","display_name":"Advisory committee","level":2,"score":0.45669999718666077},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.42829999327659607},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.42489999532699585},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4171999990940094},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40549999475479126},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4011000096797943},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3824999928474426},{"id":"https://openalex.org/C37228920","wikidata":"https://www.wikidata.org/wiki/Q1307600","display_name":"Experiential learning","level":2,"score":0.3783999979496002},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.3734999895095825},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.328000009059906},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.2928999960422516},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/a19030230","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a19030230","pdf_url":"https://www.mdpi.com/1999-4893/19/3/230/pdf?version=1773837046","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:23c383f7c03a4edea26040f0dc472e43","is_oa":true,"landing_page_url":"https://doaj.org/article/23c383f7c03a4edea26040f0dc472e43","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 19, Iss 3, p 230 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/a19030230","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a19030230","pdf_url":"https://www.mdpi.com/1999-4893/19/3/230/pdf?version=1773837046","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138932594.pdf","grobid_xml":"https://content.openalex.org/works/W7138932594.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W2145339207","https://openalex.org/W2746553466","https://openalex.org/W2967578086","https://openalex.org/W3038822267","https://openalex.org/W3120270228","https://openalex.org/W3124201714","https://openalex.org/W3139866229","https://openalex.org/W3156364506","https://openalex.org/W3199069166","https://openalex.org/W3204117917","https://openalex.org/W4292707814","https://openalex.org/W4301373661","https://openalex.org/W4306830960","https://openalex.org/W4311719973","https://openalex.org/W4312548104","https://openalex.org/W4388996689","https://openalex.org/W4390401418","https://openalex.org/W4391979614","https://openalex.org/W4392255641","https://openalex.org/W4405933903","https://openalex.org/W4406281368","https://openalex.org/W7127271357"],"related_works":[],"abstract_inverted_index":{"This":[0,45],"study":[1],"proposes":[2],"Advisory":[3],"Board":[4],"Reinforcement":[5],"Learning":[6],"(AdvB-RL),":[7],"a":[8,28,41,84,190],"cooperative":[9,201],"reinforcement-learning":[10],"framework":[11,60],"that":[12,35,141],"integrates":[13],"multiple":[14],"advisory":[15,33,74,146,168],"neural":[16],"networks":[17,34],"to":[18,37,184,195],"guide":[19],"policy":[20],"optimization.":[21],"Unlike":[22],"conventional":[23],"single-agent":[24],"architectures,":[25],"AdvB-RL":[26,188],"maintains":[27],"set":[29],"of":[30,77,145],"independently":[31],"trained":[32],"contribute":[36],"action":[38],"selection":[39],"through":[40],"dynamic":[42],"aggregation":[43],"mechanism.":[44],"design":[46],"preserves":[47],"diverse":[48],"experiential":[49],"knowledge":[50],"while":[51,174],"improving":[52],"learning":[53,198],"stability":[54],"and":[55,71,80,116,152,192],"the":[56,135,143,156,160,164,167],"exploration\u2013exploitation":[57],"balance.":[58],"The":[59,90],"is":[61],"evaluated":[62],"on":[63,99,110,120],"three":[64,136],"benchmark":[65],"control":[66],"tasks,":[67],"namely":[68],"LunarLander-v2,":[69],"CartPole-v1,":[70],"MountainCar-v0,":[72],"using":[73],"board":[75,169],"sizes":[76],"1,":[78],"5,":[79],"10":[81,93,178],"members":[82,147],"against":[83],"Double":[85],"Deep":[86],"Q-Network":[87],"(DDQN)":[88],"baseline.":[89],"best-performing":[91],"configuration,":[92],"AdvB,":[94],"achieved":[95],"270.02":[96],"\u00b1":[97,103,108,114,118,124],"24.74":[98],"LunarLander-v2":[100],"versus":[101,112,122],"227.92":[102],"86.02":[104],"for":[105,199],"DDQN,":[106],"497.79":[107],"5.18":[109],"CartPole-v1":[111],"304.37":[113],"144.04,":[115],"\u2212103.16":[117],"15.46":[119],"MountainCar-v0":[121],"\u2212130.71":[123],"31.64,":[125],"indicating":[126],"higher":[127],"returns":[128,182],"together":[129],"with":[130,155],"markedly":[131],"lower":[132],"variability.":[133],"Across":[134],"environments,":[137],"these":[138],"results":[139],"show":[140,180],"increasing":[142],"number":[144],"improves":[148],"both":[149],"reward":[150],"consistency":[151],"overall":[153],"robustness,":[154],"10-member":[157],"setting":[158],"providing":[159],"strongest":[161],"performance.":[162],"Within":[163],"tested":[165],"configurations,":[166],"mechanism":[170],"remains":[171],"computationally":[172],"feasible,":[173],"preliminary":[175],"experiments":[176],"beyond":[177],"advisors":[179],"diminishing":[181],"relative":[183],"added":[185],"complexity.":[186],"Overall,":[187],"provides":[189],"robust":[191],"modular":[193],"alternative":[194],"single-policy":[196],"reinforcement":[197],"adaptive":[200],"control.":[202]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-20T00:00:00"}
