{"id":"https://openalex.org/W4411086320","doi":"https://doi.org/10.1109/lra.2025.3577527","title":"LAMARL: LLM-Aided Multi-Agent Reinforcement Learning for Cooperative Policy Generation","display_name":"LAMARL: LLM-Aided Multi-Agent Reinforcement Learning for Cooperative Policy Generation","publication_year":2025,"publication_date":"2025-06-06","ids":{"openalex":"https://openalex.org/W4411086320","doi":"https://doi.org/10.1109/lra.2025.3577527"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2025.3577527","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2025.3577527","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089159183","display_name":"Guobin Zhu","orcid":"https://orcid.org/0009-0002-2643-5304"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guobin Zhu","raw_affiliation_strings":["School of Automation Science and Electrical Engineering, Beihang University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-2643-5304","affiliations":[{"raw_affiliation_string":"School of Automation Science and Electrical Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101510785","display_name":"Rui Zhou","orcid":"https://orcid.org/0000-0003-2476-1130"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Zhou","raw_affiliation_strings":["School of Automation Science and Electrical Engineering, Beihang University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Automation Science and Electrical Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049044867","display_name":"Wenkang Ji","orcid":null},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenkang Ji","raw_affiliation_strings":["Department of Artificial Intelligence, WINDY Lab, Westlake University, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, WINDY Lab, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052346042","display_name":"Shiyu Zhao","orcid":"https://orcid.org/0000-0003-3098-8059"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiyu Zhao","raw_affiliation_strings":["Department of Artificial Intelligence, WINDY Lab, Westlake University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0003-3098-8059","affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, WINDY Lab, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5089159183"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":11.2369,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.98091952,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"10","issue":"7","first_page":"7476","last_page":"7483"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6987210512161255},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5101194977760315},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.46777358651161194},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3040519952774048},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.179571270942688},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.044598400592803955}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6987210512161255},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5101194977760315},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.46777358651161194},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3040519952774048},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.179571270942688},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.044598400592803955}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2025.3577527","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2025.3577527","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G716867869","display_name":null,"funder_award_id":"62473017","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1981826826","https://openalex.org/W2567990716","https://openalex.org/W2749468379","https://openalex.org/W2962992878","https://openalex.org/W3027397596","https://openalex.org/W3046936059","https://openalex.org/W3142049818","https://openalex.org/W3197594072","https://openalex.org/W3211583675","https://openalex.org/W3214437493","https://openalex.org/W4225712251","https://openalex.org/W4362515116","https://openalex.org/W4380576974","https://openalex.org/W4383097638","https://openalex.org/W4394661742","https://openalex.org/W4396877948","https://openalex.org/W4401023556","https://openalex.org/W4401415431","https://openalex.org/W4401416363","https://openalex.org/W4401553778","https://openalex.org/W4401607584","https://openalex.org/W4402716424","https://openalex.org/W4404534210","https://openalex.org/W4405516940","https://openalex.org/W4406665965","https://openalex.org/W6634004297","https://openalex.org/W6735033012","https://openalex.org/W6738796088","https://openalex.org/W6749859622","https://openalex.org/W6771968188","https://openalex.org/W6809646742","https://openalex.org/W6848676007","https://openalex.org/W6851775633","https://openalex.org/W6857151620","https://openalex.org/W6864532551","https://openalex.org/W6870362625"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856"],"abstract_inverted_index":{"Although":[0],"Multi-Agent":[1],"Reinforcement":[2],"Learning":[3],"(MARL)":[4],"is":[5,88],"effective":[6],"for":[7],"complex":[8],"multi-robot":[9,37],"tasks,":[10],"it":[11],"suffers":[12],"from":[13],"low":[14],"sample":[15,58,125],"efficiency":[16,59,126],"and":[17,82,108,132,143,154],"requires":[18],"iterative":[19],"manual":[20,62],"reward":[21,83],"tuning.":[22],"Large":[23],"Language":[24],"Models":[25],"(LLMs)":[26],"have":[27],"shown":[28],"promise":[29],"in":[30,36],"single-robot":[31],"settings,":[32],"but":[33],"their":[34],"application":[35],"systems":[38],"remains":[39],"largely":[40],"unexplored.":[41],"This":[42],"paper":[43],"introduces":[44],"a":[45,102],"novel":[46],"LLM-Aided":[47],"MARL":[48,53],"(LAMARL)":[49],"approach,":[50],"which":[51,90],"integrates":[52],"with":[54],"LLMs,":[55],"significantly":[56],"enhancing":[57],"without":[60],"requiring":[61],"design.":[63],"LAMARL":[64],"consists":[65],"of":[66,79,115,130],"two":[67],"modules:":[68],"the":[69,77,92,112,121],"first":[70],"module":[71,87],"leverages":[72],"LLMs":[73],"to":[74,95],"fully":[75],"automate":[76],"generation":[78],"prior":[80,122],"policy":[81,98,123],"functions.":[84],"The":[85],"second":[86],"MARL,":[89],"uses":[91],"generated":[93],"functions":[94],"guide":[96],"robot":[97],"training":[99],"effectively.":[100],"On":[101],"shape":[103],"assembly":[104],"benchmark,":[105],"both":[106],"simulation":[107],"real-world":[109],"experiments":[110],"demonstrate":[111],"unique":[113],"advantages":[114],"LAMARL.":[116],"Ablation":[117],"studies":[118],"show":[119],"that":[120],"improves":[124],"by":[127,151],"an":[128],"average":[129],"185.9%":[131],"enhances":[133],"task":[134],"completion,":[135],"while":[136],"structured":[137],"prompts":[138],"based":[139],"on":[140],"Chain-of-Thought":[141],"(CoT)":[142],"basic":[144],"APIs":[145],"improve":[146],"LLM":[147],"output":[148],"success":[149],"rates":[150],"28.5%-67.5%.":[152],"Videos":[153],"code":[155],"are":[156],"available":[157],"at":[158],"<uri":[159],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[160],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://windylab.github.io/LAMARL/</uri>":[161]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":3}],"updated_date":"2026-05-27T09:02:27.158192","created_date":"2025-10-10T00:00:00"}
