{"id":"https://openalex.org/W4417130870","doi":"https://doi.org/10.1109/tase.2025.3637862","title":"EALLMs: Environment-Aligned LLMs for Enhanced Exploration and Communication in Multi-Agent Reinforcement Learning","display_name":"EALLMs: Environment-Aligned LLMs for Enhanced Exploration and Communication in Multi-Agent Reinforcement Learning","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W4417130870","doi":"https://doi.org/10.1109/tase.2025.3637862"},"language":null,"primary_location":{"id":"doi:10.1109/tase.2025.3637862","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tase.2025.3637862","pdf_url":null,"source":{"id":"https://openalex.org/S34881539","display_name":"IEEE Transactions on Automation Science and Engineering","issn_l":"1545-5955","issn":["1545-5955","1558-3783"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automation Science and Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109676488","display_name":"Zhuohui Zhang","orcid":"https://orcid.org/0009-0000-5040-9654"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhuohui Zhang","raw_affiliation_strings":["Department of Control Science and Engineering, Tongji University, Shanghai, China","Department of Control Science &#x0026; Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Department of Control Science &#x0026; Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076471974","display_name":"Bin Cheng","orcid":"https://orcid.org/0000-0003-0281-4860"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Cheng","raw_affiliation_strings":["Department of Control Science and Engineering, Tongji University, Shanghai, China","Department of Control Science &#x0026; Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Department of Control Science &#x0026; Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049557511","display_name":"Bin He","orcid":"https://orcid.org/0000-0003-3193-6269"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin He","raw_affiliation_strings":["Department of Control Science and Engineering, Tongji University, Shanghai, China","Department of Control Science &#x0026; Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Department of Control Science &#x0026; Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5109676488"],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20449977,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"23","issue":null,"first_page":"1009","last_page":"1020"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.1615000069141388,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.1615000069141388,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.15800000727176666,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.12710000574588776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8084999918937683},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5095000267028809},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.3709999918937683},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.3431999981403351},{"id":"https://openalex.org/keywords/integrator","display_name":"Integrator","score":0.33399999141693115},{"id":"https://openalex.org/keywords/state-information","display_name":"State information","score":0.31049999594688416}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8084999918937683},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6247000098228455},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5095000267028809},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41440001130104065},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4068000018596649},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.3709999918937683},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.3431999981403351},{"id":"https://openalex.org/C79518650","wikidata":"https://www.wikidata.org/wiki/Q2081431","display_name":"Integrator","level":3,"score":0.33399999141693115},{"id":"https://openalex.org/C2985963534","wikidata":"https://www.wikidata.org/wiki/Q7603704","display_name":"State information","level":3,"score":0.31049999594688416},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.29820001125335693},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.28870001435279846},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2741999924182892},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26510000228881836},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.26489999890327454},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.25200000405311584},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tase.2025.3637862","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tase.2025.3637862","pdf_url":null,"source":{"id":"https://openalex.org/S34881539","display_name":"IEEE Transactions on Automation Science and Engineering","issn_l":"1545-5955","issn":["1545-5955","1558-3783"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automation Science and Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1289750502","display_name":null,"funder_award_id":"62103302","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2575069074","display_name":null,"funder_award_id":"62088101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2768245115","display_name":null,"funder_award_id":"62495094","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6431279487","display_name":null,"funder_award_id":"62573322","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8755924454","display_name":null,"funder_award_id":"24QA2709400","funder_id":"https://openalex.org/F4320327803","funder_display_name":"Shanghai Rising-Star Program"},{"id":"https://openalex.org/G8848870758","display_name":null,"funder_award_id":"22120240276","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320327803","display_name":"Shanghai Rising-Star Program","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1542941925","https://openalex.org/W2982316857","https://openalex.org/W3034723486","https://openalex.org/W3174770825","https://openalex.org/W3199748991","https://openalex.org/W4205991051","https://openalex.org/W4213053159","https://openalex.org/W4383097638","https://openalex.org/W4385573990","https://openalex.org/W4389179347","https://openalex.org/W4392309410","https://openalex.org/W4393404844","https://openalex.org/W4394564139","https://openalex.org/W4396909955","https://openalex.org/W4399665804","https://openalex.org/W4404576592","https://openalex.org/W4406727889","https://openalex.org/W4407467601","https://openalex.org/W4409348008","https://openalex.org/W4409723389","https://openalex.org/W4409796715","https://openalex.org/W4411119815","https://openalex.org/W4412719127"],"related_works":[],"abstract_inverted_index":{"Leveraging":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"for":[5,66,97],"collaborative":[6],"sequential":[7],"decision-making":[8],"is":[9,70],"a":[10,63],"significant":[11],"challenge,":[12],"despite":[13],"strong":[14],"semantic":[15],"understanding":[16],"and":[17,38,69,118,125,131],"extensive":[18],"prior":[19],"knowledge.":[20],"Conversely,":[21],"multi-agent":[22,108],"reinforcement":[23],"learning":[24],"(MARL)":[25],"can":[26],"learn":[27],"environment-aligned":[28,53],"policies":[29],"through":[30,72],"interaction,":[31],"but":[32],"often":[33],"suffers":[34],"from":[35],"inefficient":[36],"exploration":[37,123],"heavy":[39],"reliance":[40],"on":[41,128,146],"centralized":[42],"global":[43,95,148],"state":[44,96],"information.":[45],"To":[46],"achieve":[47,76,141],"complementary":[48],"advantages,":[49],"we":[50,101],"propose":[51],"the":[52,79,129,152,156],"LLMs":[54],"(EALLMs).":[55],"In":[56],"our":[57],"framework,":[58],"an":[59,90],"LLM":[60,157],"serves":[61],"as":[62,89],"shared":[64],"policy":[65],"all":[67],"agents":[68],"updated":[71],"online":[73],"MARL":[74,117],"to":[75,93,107,140],"alignment":[77],"with":[78,85],"environment.":[80],"Simultaneously,":[81],"another":[82],"LLM,":[83],"fine-tuned":[84],"offline":[86],"datasets,":[87],"acts":[88],"information":[91],"integrator":[92],"generate":[94],"communication":[98],"purposes.":[99],"Additionally,":[100],"design":[102],"robust,":[103],"task-specific":[104],"prompts":[105],"tailored":[106],"systems.":[109],"Extensive":[110],"experiments":[111],"demonstrate":[112],"that":[113],"EALLMs":[114],"outperform":[115],"classical":[116],"LLM-based":[119],"baselines":[120],"in":[121],"both":[122],"efficiency":[124],"overall":[126],"performance":[127],"SMAC":[130],"SMACv2":[132],"benchmarks.":[133],"Ablation":[134],"studies":[135],"further":[136],"confirm":[137],"EALLMs\u2019":[138],"ability":[139],"competitive":[142],"results":[143],"without":[144],"relying":[145],"explicit":[147],"state,":[149],"while":[150],"preserving":[151],"original":[153],"capabilities":[154],"of":[155],"during":[158],"alignment.":[159]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-12-08T00:00:00"}
