{"id":"https://openalex.org/W4410737611","doi":"https://doi.org/10.1109/lcsys.2025.3573938","title":"Swarm-Level Task Learning via Generalized Moments in Reinforcement Learning With Reward Machines","display_name":"Swarm-Level Task Learning via Generalized Moments in Reinforcement Learning With Reward Machines","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4410737611","doi":"https://doi.org/10.1109/lcsys.2025.3573938"},"language":"en","primary_location":{"id":"doi:10.1109/lcsys.2025.3573938","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lcsys.2025.3573938","pdf_url":null,"source":{"id":"https://openalex.org/S4306422535","display_name":"IEEE Control Systems Letters","issn_l":"2475-1456","issn":["2475-1456"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Control Systems Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093925782","display_name":"Shayan Meshkat Alsadat","orcid":"https://orcid.org/0009-0006-4301-1430"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shayan Meshkat Alsadat","raw_affiliation_strings":["School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA","School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, Arizona, USA"],"affiliations":[{"raw_affiliation_string":"School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA","institution_ids":["https://openalex.org/I55732556"]},{"raw_affiliation_string":"School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, Arizona, USA","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Vivek Sunil Kulkarni","orcid":"https://orcid.org/0009-0007-5096-0199"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vivek Sunil Kulkarni","raw_affiliation_strings":["School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA","School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, Arizona, USA"],"affiliations":[{"raw_affiliation_string":"School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA","institution_ids":["https://openalex.org/I55732556"]},{"raw_affiliation_string":"School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, Arizona, USA","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013789785","display_name":"Zhe Xu","orcid":"https://orcid.org/0000-0002-0440-0912"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhe Xu","raw_affiliation_strings":["School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA","School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, Arizona, USA"],"affiliations":[{"raw_affiliation_string":"School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA","institution_ids":["https://openalex.org/I55732556"]},{"raw_affiliation_string":"School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, Arizona, USA","institution_ids":["https://openalex.org/I55732556"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5093925782"],"corresponding_institution_ids":["https://openalex.org/I55732556"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0524451,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"9","issue":null,"first_page":"450","last_page":"455"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9574000239372253,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9574000239372253,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8319052457809448},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6283733248710632},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5702792406082153},{"id":"https://openalex.org/keywords/swarm-behaviour","display_name":"Swarm behaviour","score":0.5442266464233398},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49887681007385254},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4465385675430298},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37785428762435913},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.23468589782714844},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1772143840789795},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.0610429048538208}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8319052457809448},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6283733248710632},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5702792406082153},{"id":"https://openalex.org/C181335050","wikidata":"https://www.wikidata.org/wiki/Q14915018","display_name":"Swarm behaviour","level":2,"score":0.5442266464233398},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49887681007385254},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4465385675430298},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37785428762435913},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.23468589782714844},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1772143840789795},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0610429048538208},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lcsys.2025.3573938","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lcsys.2025.3573938","pdf_url":null,"source":{"id":"https://openalex.org/S4306422535","display_name":"IEEE Control Systems Letters","issn_l":"2475-1456","issn":["2475-1456"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Control Systems Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.49000000953674316,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G5840370012","display_name":null,"funder_award_id":"N00014-23-1-2505","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2117905067","https://openalex.org/W2544041682","https://openalex.org/W2746553466","https://openalex.org/W2883532348","https://openalex.org/W2955404434","https://openalex.org/W2972500268","https://openalex.org/W3173218700","https://openalex.org/W3176621594","https://openalex.org/W4285613368","https://openalex.org/W4319866032","https://openalex.org/W4402264525","https://openalex.org/W6618135342","https://openalex.org/W6703271639","https://openalex.org/W6738796088","https://openalex.org/W6739516088","https://openalex.org/W6747941106","https://openalex.org/W6752298494","https://openalex.org/W6779901582","https://openalex.org/W6864459689"],"related_works":["https://openalex.org/W4288714711","https://openalex.org/W3200708550","https://openalex.org/W2736680465","https://openalex.org/W2771637876","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W4294093918","https://openalex.org/W4240853094","https://openalex.org/W2810496283"],"abstract_inverted_index":{"Swarm-level":[0],"task":[1,38],"learning":[2,7,110,174],"provides":[3],"a":[4,18,46,115],"basis":[5],"for":[6],"complex":[8,175],"tasks":[9],"in":[10,66,155,173],"swarm":[11,47,92,176],"systems":[12],"using":[13],"swarm-level":[14],"features.":[15],"We":[16,31,85,127,150],"propose":[17],"method,":[19],"SwaRM-L,":[20],"to":[21,35,50,58,73,90,105,107,118,133,142],"learn":[22,51,80],"reward":[23,29,33],"machines":[24,34],"(RMs)":[25],"that":[26,54,129],"encode":[27],"non-Markovian":[28],"functions.":[30],"use":[32,86,114],"specify":[36],"the":[37,63,74,96,108,143,148,168],"and":[39,78,94,122,137],"its":[40],"temporal":[41],"structure.":[42],"Our":[43,165],"approach":[44],"enables":[45],"of":[48,170],"agents":[49,113,161],"an":[52,102,134,139],"RM":[53,61,77,97,141,146],"eventually":[55],"becomes":[56],"equivalent":[57,140],"ground":[59,75,144],"truth":[60,76,145],"(i.e.,":[62],"specified":[64],"task)":[65],"this":[67],"environment":[68,83],"(agents":[69],"have":[70],"no":[71],"access":[72],"only":[79],"it":[81],"through":[82],"interaction).":[84],"generalized":[87],"moments":[88],"(GMs)":[89],"characterize":[91],"features":[93],"estimate":[95],"state.":[98],"Each":[99],"agent":[100],"maintains":[101],"estimated":[103,125],"GM":[104],"contribute":[106],"collective":[109],"process.":[111],"The":[112],"gossip":[116],"algorithm":[117],"communicate":[119],"with":[120,162],"neighbors":[121],"update":[123],"their":[124],"GMs.":[126],"prove":[128],"our":[130,152,171],"method":[131,154,172],"converges":[132],"optimal":[135],"policy":[136],"learns":[138],"within":[147],"environment.":[149],"evaluate":[151],"proposed":[153],"three":[156],"case":[157],"studies":[158],"involving":[159],"forty":[160],"homogeneous":[163],"dynamics.":[164],"results":[166],"demonstrate":[167],"effectiveness":[169],"behaviors.":[177]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
