{"id":"https://openalex.org/W3045695591","doi":"https://doi.org/10.23919/acc45564.2020.9147329","title":"Hierarchical Control of Multi-Agent Systems using Online Reinforcement Learning","display_name":"Hierarchical Control of Multi-Agent Systems using Online Reinforcement Learning","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3045695591","doi":"https://doi.org/10.23919/acc45564.2020.9147329","mag":"3045695591"},"language":"en","primary_location":{"id":"doi:10.23919/acc45564.2020.9147329","is_oa":false,"landing_page_url":"https://doi.org/10.23919/acc45564.2020.9147329","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 American Control Conference (ACC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040513070","display_name":"He Bai","orcid":"https://orcid.org/0000-0002-4247-0698"},"institutions":[{"id":"https://openalex.org/I115475287","display_name":"Oklahoma State University","ror":"https://ror.org/01g9vbr38","country_code":"US","type":"education","lineage":["https://openalex.org/I115475287"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"He Bai","raw_affiliation_strings":["Oklahoma State University, Stillwater, OK, USA"],"affiliations":[{"raw_affiliation_string":"Oklahoma State University, Stillwater, OK, USA","institution_ids":["https://openalex.org/I115475287"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054187846","display_name":"Jemin George","orcid":"https://orcid.org/0000-0001-8417-5411"},"institutions":[{"id":"https://openalex.org/I166416128","display_name":"DEVCOM Army Research Laboratory","ror":"https://ror.org/011hc8f90","country_code":"US","type":"government","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I166416128","https://openalex.org/I2802705668","https://openalex.org/I4210154437"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jemin George","raw_affiliation_strings":["U.S. Army Research Laboratory, Adelphi, MD, US"],"affiliations":[{"raw_affiliation_string":"U.S. Army Research Laboratory, Adelphi, MD, US","institution_ids":["https://openalex.org/I166416128"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055360778","display_name":"Aranya Chakrabortty","orcid":"https://orcid.org/0000-0002-3474-8215"},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aranya Chakrabortty","raw_affiliation_strings":["North Carolina State University, Raleigh, NC, USA"],"affiliations":[{"raw_affiliation_string":"North Carolina State University, Raleigh, NC, USA","institution_ids":["https://openalex.org/I137902535"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5040513070"],"corresponding_institution_ids":["https://openalex.org/I115475287"],"apc_list":null,"apc_paid":null,"fwci":0.4414,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.61728478,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14083","display_name":"Extremum Seeking Control Systems","score":0.9775999784469604,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14083","display_name":"Extremum Seeking Control Systems","score":0.9775999784469604,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9508000016212463,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9483000040054321,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8514955043792725},{"id":"https://openalex.org/keywords/linear-quadratic-regulator","display_name":"Linear-quadratic regulator","score":0.8226857781410217},{"id":"https://openalex.org/keywords/algebraic-riccati-equation","display_name":"Algebraic Riccati equation","score":0.6136435866355896},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.5871543288230896},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.5516383647918701},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5402533411979675},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.5098382830619812},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.5096189975738525},{"id":"https://openalex.org/keywords/linear-quadratic-gaussian-control","display_name":"Linear-quadratic-Gaussian control","score":0.49712827801704407},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4850298762321472},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.4754742681980133},{"id":"https://openalex.org/keywords/riccati-equation","display_name":"Riccati equation","score":0.45989707112312317},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4251776933670044},{"id":"https://openalex.org/keywords/decentralised-system","display_name":"Decentralised system","score":0.41273725032806396},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.30384719371795654},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.27699506282806396},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2724097967147827}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8514955043792725},{"id":"https://openalex.org/C98779006","wikidata":"https://www.wikidata.org/wiki/Q2520550","display_name":"Linear-quadratic regulator","level":3,"score":0.8226857781410217},{"id":"https://openalex.org/C13847129","wikidata":"https://www.wikidata.org/wiki/Q4723989","display_name":"Algebraic Riccati equation","level":4,"score":0.6136435866355896},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.5871543288230896},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.5516383647918701},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5402533411979675},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.5098382830619812},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.5096189975738525},{"id":"https://openalex.org/C204495892","wikidata":"https://www.wikidata.org/wiki/Q1798304","display_name":"Linear-quadratic-Gaussian control","level":3,"score":0.49712827801704407},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4850298762321472},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.4754742681980133},{"id":"https://openalex.org/C45473103","wikidata":"https://www.wikidata.org/wiki/Q851503","display_name":"Riccati equation","level":3,"score":0.45989707112312317},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4251776933670044},{"id":"https://openalex.org/C205875254","wikidata":"https://www.wikidata.org/wiki/Q17156857","display_name":"Decentralised system","level":3,"score":0.41273725032806396},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.30384719371795654},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27699506282806396},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2724097967147827},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78045399","wikidata":"https://www.wikidata.org/wiki/Q11214","display_name":"Differential equation","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/acc45564.2020.9147329","is_oa":false,"landing_page_url":"https://doi.org/10.23919/acc45564.2020.9147329","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 American Control Conference (ACC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1891861977","https://openalex.org/W1969742965","https://openalex.org/W1988251927","https://openalex.org/W2051466922","https://openalex.org/W2165726932","https://openalex.org/W2461556483","https://openalex.org/W2564717627","https://openalex.org/W2784146092","https://openalex.org/W2913981879","https://openalex.org/W4253985045","https://openalex.org/W4301886962","https://openalex.org/W6719122095"],"related_works":["https://openalex.org/W2005019312","https://openalex.org/W2547959172","https://openalex.org/W2018296362","https://openalex.org/W2727200542","https://openalex.org/W2726381554","https://openalex.org/W2002357820","https://openalex.org/W197349956","https://openalex.org/W2997206142","https://openalex.org/W2027962246","https://openalex.org/W2804494800"],"abstract_inverted_index":{"We":[0],"propose":[1],"a":[2,55,98,114,167],"new":[3],"reinforcement":[4,115,175],"learning":[5,116,170],"based":[6,135],"approach":[7],"to":[8,53,72,104,130,161,173],"designing":[9],"hierarchical":[10],"linear":[11,18],"quadratic":[12],"regulator":[13],"(LQR)":[14],"controllers":[15,134],"for":[16],"heterogeneous":[17],"multi-agent":[19],"systems":[20],"with":[21],"unknown":[22],"state-space":[23],"models":[24,61],"and":[25,40,126],"separated":[26],"control":[27,43],"objectives.":[28,48],"The":[29,49,64],"separation":[30],"arises":[31],"from":[32],"grouping":[33],"the":[34,42,68,78,83,90,94,123,127,132,137,142,155,163],"agents":[35],"into":[36],"multiple":[37],"non-overlapping":[38],"groups,":[39],"defining":[41],"goal":[44],"as":[45],"two":[46],"distinct":[47],"first":[50,143],"objective":[51],"aims":[52],"minimize":[54,73],"group-wise":[56],"block-decentralized":[57,146],"LQR":[58,75,95],"function":[59,76],"that":[60,100,118],"group-level":[62],"mission.":[63],"second":[65,156],"objective,":[66],"on":[67,136],"other":[69],"hand,":[70],"tries":[71],"an":[74],"between":[77],"average":[79,128],"states":[80,125,129],"(centroids)":[81],"of":[82,93,122],"groups.":[84],"Exploiting":[85],"this":[86],"separation,":[87],"we":[88,112],"redefine":[89],"weighting":[91],"matrices":[92],"functions":[96],"in":[97,152],"way":[99],"they":[101],"allow":[102],"us":[103],"decouple":[105],"their":[106],"respective":[107,133],"algebraic":[108],"Riccati":[109,139],"equations.":[110,140],"Thereafter,":[111],"develop":[113],"strategy":[117],"uses":[119],"online":[120],"measurements":[121],"agent":[124],"learn":[131],"approximate":[138],"Since":[141],"controller":[144,157],"is":[145,158],"and,":[147],"therefore,":[148],"can":[149],"be":[150],"learned":[151],"parallel,":[153],"while":[154],"reduced-dimensional":[159],"due":[160],"averaging,":[162],"overall":[164],"design":[165],"enjoys":[166],"significantly":[168],"reduced":[169],"time":[171],"compared":[172],"centralized":[174],"learning.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
