{"id":"https://openalex.org/W4402264525","doi":"https://doi.org/10.23919/acc60939.2024.10644549","title":"Distributed Reinforcement Learning For Swarm Systems With Reward Machines","display_name":"Distributed Reinforcement Learning For Swarm Systems With Reward Machines","publication_year":2024,"publication_date":"2024-07-10","ids":{"openalex":"https://openalex.org/W4402264525","doi":"https://doi.org/10.23919/acc60939.2024.10644549"},"language":"en","primary_location":{"id":"doi:10.23919/acc60939.2024.10644549","is_oa":false,"landing_page_url":"https://doi.org/10.23919/acc60939.2024.10644549","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 American Control Conference (ACC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093925782","display_name":"Shayan Meshkat Alsadat","orcid":"https://orcid.org/0009-0006-4301-1430"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shayan Meshkat Alsadat","raw_affiliation_strings":["School of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85281"],"affiliations":[{"raw_affiliation_string":"School of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85281","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065122789","display_name":"Nasim Baharisangari","orcid":"https://orcid.org/0000-0002-3984-8733"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nasim Baharisangari","raw_affiliation_strings":["School of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85281"],"affiliations":[{"raw_affiliation_string":"School of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85281","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092346339","display_name":"Yash Paliwal","orcid":null},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yash Paliwal","raw_affiliation_strings":["School of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85281"],"affiliations":[{"raw_affiliation_string":"School of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85281","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013789785","display_name":"Zhe Xu","orcid":"https://orcid.org/0000-0002-0440-0912"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhe Xu","raw_affiliation_strings":["School of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85281"],"affiliations":[{"raw_affiliation_string":"School of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85281","institution_ids":["https://openalex.org/I55732556"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5093925782"],"corresponding_institution_ids":["https://openalex.org/I55732556"],"apc_list":null,"apc_paid":null,"fwci":3.6632,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.93700625,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"33","last_page":"38"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11347","display_name":"Neural Networks Stability and Synchronization","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8160808086395264},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7228522896766663},{"id":"https://openalex.org/keywords/swarm-behaviour","display_name":"Swarm behaviour","score":0.49108386039733887},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.47053512930870056},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3840622007846832},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14256367087364197}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8160808086395264},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7228522896766663},{"id":"https://openalex.org/C181335050","wikidata":"https://www.wikidata.org/wiki/Q14915018","display_name":"Swarm behaviour","level":2,"score":0.49108386039733887},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.47053512930870056},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3840622007846832},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14256367087364197},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/acc60939.2024.10644549","is_oa":false,"landing_page_url":"https://doi.org/10.23919/acc60939.2024.10644549","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 American Control Conference (ACC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W2030775133","https://openalex.org/W2060447811","https://openalex.org/W2093300195","https://openalex.org/W2117905067","https://openalex.org/W2152244994","https://openalex.org/W2202743478","https://openalex.org/W2544041682","https://openalex.org/W2883532348","https://openalex.org/W2955404434","https://openalex.org/W3038835214","https://openalex.org/W3176621594","https://openalex.org/W3186567150","https://openalex.org/W3209101326","https://openalex.org/W4253572110","https://openalex.org/W4285613368","https://openalex.org/W4299802797","https://openalex.org/W4378770523","https://openalex.org/W6738796088","https://openalex.org/W6747941106","https://openalex.org/W6752298494","https://openalex.org/W6779901582","https://openalex.org/W6852597288"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W4288714711","https://openalex.org/W3200708550","https://openalex.org/W2736680465","https://openalex.org/W2771637876","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W4294093918"],"abstract_inverted_index":{"We":[0,27,72,108,196],"introduce":[1],"a":[2,169,175,191],"decentralized":[3],"reinforcement":[4],"learning":[5],"(RL)":[6],"algorithm":[7,76],"for":[8,77,113,165],"swarm":[9,35,44,93,122,131,146,170,186],"systems":[10],"where":[11],"reward":[12,25,55,66,106,149],"machines":[13,150],"(a":[14],"type":[15],"of":[16,42,53,64,91,100,104,120,129,142,156,171,187],"Mealy":[17],"machine)":[18],"are":[19],"used":[20],"to":[21,33,49,68,86,97],"encode":[22],"the":[23,29,40,43,51,54,61,65,74,80,92,105,114,117,121,125,130,140,154,185,202],"non-Markovian":[24],"functions.":[26],"use":[28,73],"generalized":[30],"moments":[31],"(GMs)":[32],"characterize":[34],"features.":[36],"Each":[37],"agent":[38,58],"estimates":[39],"GM":[41,90,119,128],"state":[45,52,63,103,123,207],"and":[46,124,133,177,180,193,210],"uses":[47,60],"it":[48],"estimate":[50,118],"machine.":[56,107],"The":[57],"then":[59],"estimated":[62,89,102],"machine":[67],"update":[69,87,99],"its":[70],"q-values.":[71],"gossip":[75],"communication":[78,85],"between":[79,116],"agents.":[81],"Agents":[82],"exploit":[83],"this":[84],"their":[88,101],"state,":[94,132],"which":[95],"leads":[96],"an":[98,110],"derive":[109],"upper":[111,136],"bound":[112],"error":[115],"ground":[126],"truth":[127],"using":[134],"that":[135,199],"bound;":[137],"we":[138,159],"prove":[139],"convergence":[141],"our":[143,157],"proposed":[144],"algorithm,":[145],"q-Iearning":[147,204],"with":[148],"(Swarm-QRM).":[151],"To":[152],"demonstrate":[153],"efficiency":[155],"approach,":[158],"present":[160],"two":[161],"case":[162],"studies":[163],"wherein,":[164],"Case":[166,182],"Study":[167,183],"1,":[168],"agents":[172,188],"will":[173,189],"perform":[174],"pickup":[176],"delivery":[178],"task,":[179],"in":[181,205],"2,":[184],"conduct":[190],"search":[192],"rescue":[194],"task.":[195],"also":[197],"show":[198],"Swarm-QRM":[200],"outperforms":[201],"baselines,":[203],"augmented":[206],"space":[208],"(QAS),":[209],"Double":[211],"Deep":[212],"Q-Network":[213],"(DDQN).":[214]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":2}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
