{"id":"https://openalex.org/W4415427996","doi":"https://doi.org/10.3233/faia251253","title":"Concurrent Multiagent Reinforcement Learning with Reward Machines","display_name":"Concurrent Multiagent Reinforcement Learning with Reward Machines","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415427996","doi":"https://doi.org/10.3233/faia251253"},"language":"en","primary_location":{"id":"doi:10.3233/faia251253","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251253","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251253","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092110114","display_name":"Alessandro Trapasso","orcid":"https://orcid.org/0000-0001-5431-6607"},"institutions":[{"id":"https://openalex.org/I861853513","display_name":"Sapienza University of Rome","ror":"https://ror.org/02be6w209","country_code":"IT","type":"education","lineage":["https://openalex.org/I861853513"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Alessandro Trapasso","raw_affiliation_strings":["Sapienza University of Rome, Rome, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sapienza University of Rome, Rome, Italy","institution_ids":["https://openalex.org/I861853513"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009429743","display_name":"Anders J\u00f6nsson","orcid":"https://orcid.org/0000-0001-6826-0130"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Universitat Pompeu Fabra","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Anders Jonsson","raw_affiliation_strings":["Universitat Pompeu Fabra, Barcelona, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra, Barcelona, Spain","institution_ids":["https://openalex.org/I170486558"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5092110114"],"corresponding_institution_ids":["https://openalex.org/I861853513"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.47341448,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8151000142097473,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8151000142097473,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8199999928474426},{"id":"https://openalex.org/keywords/synchronizing","display_name":"Synchronizing","score":0.7145000100135803},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.5756999850273132},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.517799973487854},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.44929999113082886},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4219000041484833},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.39430001378059387},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.3887999951839447}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8199999928474426},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8050000071525574},{"id":"https://openalex.org/C162932704","wikidata":"https://www.wikidata.org/wiki/Q1058791","display_name":"Synchronizing","level":3,"score":0.7145000100135803},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.5756999850273132},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5611000061035156},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.517799973487854},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.44929999113082886},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4219000041484833},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.39430001378059387},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.3887999951839447},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.37929999828338623},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.37369999289512634},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.35670000314712524},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.33340001106262207},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3325999975204468},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.31679999828338623},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.314300000667572},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.28859999775886536},{"id":"https://openalex.org/C84511453","wikidata":"https://www.wikidata.org/wiki/Q2914952","display_name":"Concurrency control","level":3,"score":0.2773999869823456},{"id":"https://openalex.org/C2775941552","wikidata":"https://www.wikidata.org/wiki/Q25212305","display_name":"Isolation (microbiology)","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.258899986743927}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3233/faia251253","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251253","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},{"id":"pmh:oai:iris.uniroma1.it:11573/1756085","is_oa":false,"landing_page_url":"https://hdl.handle.net/11573/1756085","pdf_url":null,"source":{"id":"https://openalex.org/S4377196107","display_name":"IRIS Research product catalog (Sapienza University of Rome)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.3233/faia251253","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251253","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Coordinating":[0],"and":[1,16,60,67,107,115,129],"synchronizing":[2],"multiple":[3],"agents":[4,64,86,123],"in":[5,38,72,79,84,110],"reinforcement":[6,40],"learning":[7,41],"(RL)":[8],"presents":[9],"significant":[10],"challenges,":[11],"particularly":[12],"when":[13],"concurrent":[14,120],"actions":[15,90],"shared":[17],"objectives":[18],"are":[19],"required.":[20],"We":[21,75],"propose":[22],"a":[23,80,113],"novel":[24],"framework":[25],"that":[26],"integrates":[27],"Reward":[28],"Machines":[29],"(RMs)":[30],"with":[31],"Partial-Order":[32],"Planning":[33],"(POP)":[34],"to":[35,65,88,118,124],"enhance":[36],"coordination":[37],"multiagent":[39,82],"(MARL).":[42],"By":[43],"transforming":[44],"high-level":[45],"POP":[46],"strategies":[47],"into":[48],"individual":[49],"RMs":[50,111],"for":[51],"each":[52],"agent,":[53],"our":[54,77],"approach":[55,78],"explicitly":[56],"captures":[57],"action":[58,105],"dependencies":[59,106],"concurrency":[61],"requirements,":[62],"enabling":[63,122],"learn":[66],"execute":[68],"coordinated":[69],"plans":[70],"effectively":[71],"complex":[73],"environments.":[74],"validate":[76],"grid-based":[81],"domain":[83],"which":[85],"have":[87],"synchronize":[89],"such":[91],"as":[92],"jointly":[93],"accessing":[94],"limited":[95],"pathways":[96],"or":[97],"collaboratively":[98],"manipulating":[99],"objects.":[100],"The":[101],"explicit":[102],"representation":[103],"of":[104],"synchronization":[108],"points":[109],"provides":[112],"scalable":[114],"flexible":[116],"mechanism":[117],"model":[119],"actions,":[121],"focus":[125],"on":[126],"relevant":[127],"tasks":[128],"reducing":[130],"exploration.":[131]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-24T00:00:00"}
