{"id":"https://openalex.org/W4414359780","doi":"https://doi.org/10.24963/ijcai.2025/1181","title":"The Evolving Landscape of LLM- and VLM-Integrated Reinforcement Learning","display_name":"The Evolving Landscape of LLM- and VLM-Integrated Reinforcement Learning","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414359780","doi":"https://doi.org/10.24963/ijcai.2025/1181"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/1181","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/1181","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027237583","display_name":"Sheila Schoepp","orcid":null},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Sheila Schoepp","raw_affiliation_strings":["University of Alberta"],"affiliations":[{"raw_affiliation_string":"University of Alberta","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115648109","display_name":"Masoud Jafaripour","orcid":null},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Masoud Jafaripour","raw_affiliation_strings":["University of Alberta"],"affiliations":[{"raw_affiliation_string":"University of Alberta","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109663270","display_name":"Yingyue Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Yingyue Cao","raw_affiliation_strings":["University of Alberta"],"affiliations":[{"raw_affiliation_string":"University of Alberta","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101619771","display_name":"Tianpei Yang","orcid":"https://orcid.org/0000-0002-5497-7146"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianpei Yang","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057580299","display_name":"Fatemeh Abdollahi","orcid":"https://orcid.org/0000-0001-7690-0089"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Fatemeh Abdollahi","raw_affiliation_strings":["University of Alberta"],"affiliations":[{"raw_affiliation_string":"University of Alberta","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003674260","display_name":"Shadan Golestan","orcid":"https://orcid.org/0000-0001-7906-2287"},"institutions":[{"id":"https://openalex.org/I125680101","display_name":"Turing Institute","ror":"https://ror.org/02x2mw849","country_code":"GB","type":"facility","lineage":["https://openalex.org/I125680101"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shadan Golestan","raw_affiliation_strings":["Alberta Machine Intelligence Institute"],"affiliations":[{"raw_affiliation_string":"Alberta Machine Intelligence Institute","institution_ids":["https://openalex.org/I125680101"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028789420","display_name":"Zahin Sufiyan","orcid":null},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Zahin Sufiyan","raw_affiliation_strings":["University of Alberta"],"affiliations":[{"raw_affiliation_string":"University of Alberta","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053745515","display_name":"Osmar R. Za\u0131\u0308ane","orcid":"https://orcid.org/0000-0002-0060-5988"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]},{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Osmar R. Zaiane","raw_affiliation_strings":["Alberta Machine Intelligence Institute (Amii)","University of Alberta"],"affiliations":[{"raw_affiliation_string":"Alberta Machine Intelligence Institute (Amii)","institution_ids":["https://openalex.org/I1343180700"]},{"raw_affiliation_string":"University of Alberta","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5105123683","display_name":"Matthew E. Taylor","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Matthew E. Taylor","raw_affiliation_strings":["Alberta Machine Intelligence Institute (Amii)","University of Alberta"],"affiliations":[{"raw_affiliation_string":"Alberta Machine Intelligence Institute (Amii)","institution_ids":["https://openalex.org/I1343180700"]},{"raw_affiliation_string":"University of Alberta","institution_ids":["https://openalex.org/I154425047"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5027237583"],"corresponding_institution_ids":["https://openalex.org/I154425047"],"apc_list":null,"apc_paid":null,"fwci":1.3682,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.85069727,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"10641","last_page":"10649"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.868399977684021,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.868399977684021,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5152999758720398},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4968000054359436},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.48410001397132874},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.3776000142097473},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.34950000047683716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5238999724388123},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5152999758720398},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4968000054359436},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.48410001397132874},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.3776000142097473},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3546000123023987},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.34950000047683716},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.33250001072883606},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3319999873638153},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.2962999939918518},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2930999994277954}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/1181","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/1181","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"(RL)":[2],"has":[3],"shown":[4],"impressive":[5,23],"results":[6],"in":[7,25,50,61],"sequential":[8,136],"decision-making":[9],"tasks.":[10],"Large":[11],"Language":[12],"Models":[13,17],"(LLMs)":[14],"and":[15,28,41,53,71,89,103,110,122,132],"Vision-Language":[16],"(VLMs)":[18],"have":[19,32],"recently":[20],"emerged,":[21],"exhibiting":[22],"capabilities":[24],"multimodal":[26],"understanding":[27,134],"reasoning.":[29],"These":[30],"advances":[31],"led":[33],"to":[34,57],"a":[35,76,117],"surge":[36],"of":[37,66],"research":[38,109],"integrating":[39,120],"LLMs":[40,52,121],"VLMs":[42,54,123],"into":[43,84,124],"RL.":[44],"This":[45],"survey":[46,115],"reviews":[47],"representative":[48],"works":[49],"which":[51],"are":[55],"used":[56],"overcome":[58],"key":[59],"challenges":[60],"RL,":[62,125],"such":[63],"as":[64],"lack":[65],"prior":[67],"knowledge,":[68],"long-horizon":[69],"planning,":[70],"reward":[72],"design.":[73],"We":[74,91],"present":[75],"taxonomy":[77],"that":[78,128],"categorizes":[79],"these":[80],"LLM/VLM-assisted":[81],"RL":[82],"approaches":[83,127],"three":[85],"roles:":[86],"agent,":[87],"planner,":[88],"reward.":[90],"conclude":[92],"by":[93],"exploring":[94],"open":[95],"problems,":[96],"including":[97],"grounding,":[98],"bias":[99],"mitigation,":[100],"improved":[101],"representations,":[102],"action":[104],"advice.":[105],"By":[106],"consolidating":[107],"existing":[108],"identifying":[111],"future":[112],"directions,":[113],"this":[114],"establishes":[116],"framework":[118],"for":[119],"advancing":[126],"unify":[129],"natural":[130],"language":[131],"visual":[133],"with":[135],"decision-making.":[137]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
