{"id":"https://openalex.org/W4414361030","doi":"https://doi.org/10.24963/ijcai.2025/12","title":"L2M2: A Hierarchical Framework Integrating Large Language Model and Multi-agent Reinforcement Learning","display_name":"L2M2: A Hierarchical Framework Integrating Large Language Model and Multi-agent Reinforcement Learning","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414361030","doi":"https://doi.org/10.24963/ijcai.2025/12"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/12","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/12","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://ink.library.smu.edu.sg/sis_research/10852","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031040802","display_name":"Minghong Geng","orcid":"https://orcid.org/0000-0003-4041-0883"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Minghong Geng","raw_affiliation_strings":["Singapore Management University"],"affiliations":[{"raw_affiliation_string":"Singapore Management University","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081835375","display_name":"Shubham Pateria","orcid":"https://orcid.org/0000-0002-6507-4479"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Shubham Pateria","raw_affiliation_strings":["Singapore Management University"],"affiliations":[{"raw_affiliation_string":"Singapore Management University","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062773086","display_name":"Budhitama Subagdja","orcid":"https://orcid.org/0000-0001-9774-0264"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Budhitama Subagdja","raw_affiliation_strings":["Singapore Management University"],"affiliations":[{"raw_affiliation_string":"Singapore Management University","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100717571","display_name":"Lin Li","orcid":"https://orcid.org/0000-0002-5446-6100"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin Li","raw_affiliation_strings":["MIGU Co., Ltd"],"affiliations":[{"raw_affiliation_string":"MIGU Co., Ltd","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100668752","display_name":"Xin Zhao","orcid":"https://orcid.org/0000-0003-2176-7537"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Zhao","raw_affiliation_strings":["Tsinghua University, Beijing National Research Center for Information Science and Technology"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing National Research Center for Information Science and Technology","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004167801","display_name":"Ah\u2010Hwee Tan","orcid":"https://orcid.org/0000-0003-0378-4069"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ah-Hwee Tan","raw_affiliation_strings":["Singapore Management University"],"affiliations":[{"raw_affiliation_string":"Singapore Management University","institution_ids":["https://openalex.org/I79891267"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5031040802"],"corresponding_institution_ids":["https://openalex.org/I79891267"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13228726,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"99","last_page":"107"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5372999906539917,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5372999906539917,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.515500009059906,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4611999988555908,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7250000238418579},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.6298999786376953},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.4196999967098236},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4108000099658966},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.3560999929904938},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.2985999882221222},{"id":"https://openalex.org/keywords/kernel-density-estimation","display_name":"Kernel density estimation","score":0.295199990272522}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7347000241279602},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7250000238418579},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.6298999786376953},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.546500027179718},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5415999889373779},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.4196999967098236},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4108000099658966},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3425999879837036},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.2985999882221222},{"id":"https://openalex.org/C71134354","wikidata":"https://www.wikidata.org/wiki/Q458825","display_name":"Kernel density estimation","level":3,"score":0.295199990272522},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.27219998836517334},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25619998574256897}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.24963/ijcai.2025/12","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/12","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-11854","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/10852","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.24963/ijcai.2025/12","raw_type":"Conference Proceeding Article"}],"best_oa_location":{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-11854","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/10852","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.24963/ijcai.2025/12","raw_type":"Conference Proceeding Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multi-agent":[0],"reinforcement":[1],"learning":[2],"(MARL)":[3],"has":[4],"demonstrated":[5],"remarkable":[6],"success":[7],"in":[8,15,86],"collaborative":[9],"tasks,":[10,34],"yet":[11],"faces":[12],"significant":[13],"challenges":[14],"scaling":[16],"to":[17,108,143],"complex":[18,154],"scenarios":[19,129],"requiring":[20,99],"sustained":[21],"planning":[22,63,72],"and":[23,41,64,78,122],"coordination":[24,156],"across":[25],"long":[26],"horizons.":[27],"While":[28],"hierarchical":[29,52],"approaches":[30],"help":[31],"decompose":[32],"these":[33],"they":[35],"typically":[36],"rely":[37],"on":[38],"hand-crafted":[39],"subtasks":[40],"domain-specific":[42],"knowledge,":[43],"limiting":[44],"their":[45],"generalizability.":[46],"We":[47],"present":[48],"L2M2,":[49],"a":[50],"novel":[51],"framework":[53],"that":[54,73,91],"leverages":[55],"large":[56],"language":[57],"models":[58],"(LLMs)":[59],"for":[60,66,152],"high-level":[61],"strategic":[62],"MARL":[65,83,94],"low-level":[67],"execution.":[68],"L2M2":[69,115],"enables":[70],"zero-shot":[71],"supports":[74],"both":[75],"end-to-end":[76],"training":[77,105],"direct":[79],"integration":[80],"with":[81,119],"pre-trained":[82],"models.":[84],"Experiments":[85],"the":[87,104,112],"VMAS":[88],"environment":[89],"demonstrate":[90],"L2M2's":[92,141],"LLM-guided":[93],"achieves":[95],"superior":[96],"performance":[97,118],"while":[98],"less":[100],"than":[101],"20%":[102],"of":[103],"samples":[106],"compared":[107],"baseline":[109,131],"methods.":[110],"In":[111],"MOSMAC":[113],"environment,":[114],"demonstrates":[116],"strong":[117],"pre-defined":[120],"subgoals":[121,127],"maintains":[123],"substantial":[124],"effectiveness":[125],"without":[126],"-":[128],"where":[130],"methods":[132],"consistently":[133],"fail.":[134],"Analysis":[135],"through":[136],"kernel":[137],"density":[138],"estimation":[139],"reveals":[140],"ability":[142],"automatically":[144],"generate":[145],"appropriate":[146],"navigation":[147],"plans,":[148],"demonstrating":[149],"its":[150],"potential":[151],"addressing":[153],"multi-agent":[155],"tasks.":[157]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
