{"id":"https://openalex.org/W4399240167","doi":"https://doi.org/10.1142/s2972335324500042","title":"TWOSOME: An Efficient Online Framework to Align LLMs with Embodied Environments via Reinforcement Learning","display_name":"TWOSOME: An Efficient Online Framework to Align LLMs with Embodied Environments via Reinforcement Learning","publication_year":2024,"publication_date":"2024-05-31","ids":{"openalex":"https://openalex.org/W4399240167","doi":"https://doi.org/10.1142/s2972335324500042"},"language":"en","primary_location":{"id":"doi:10.1142/s2972335324500042","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s2972335324500042","pdf_url":null,"source":{"id":"https://openalex.org/S5407039934","display_name":"International Journal of Artificial Intelligence and Robotics Research","issn_l":"2972-3353","issn":["2972-3353","2972-3361"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Artificial Intelligence and Robotics Research","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065270160","display_name":"W.C. Tan","orcid":"https://orcid.org/0009-0008-4231-8777"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Weihao Tan","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0009-0008-4231-8777","affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100459881","display_name":"Wentao Zhang","orcid":"https://orcid.org/0009-0008-2767-6998"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Wentao Zhang","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0009-0008-2767-6998","affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031053184","display_name":"Shanqi Liu","orcid":"https://orcid.org/0000-0003-0583-2423"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shanqi Liu","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, China"],"raw_orcid":"https://orcid.org/0000-0003-0583-2423","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, China","institution_ids":["https://openalex.org/I168879160"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011940311","display_name":"Longtao Zheng","orcid":"https://orcid.org/0000-0002-7301-0862"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Longtao Zheng","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-7301-0862","affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049783685","display_name":"Xinrun Wang","orcid":"https://orcid.org/0000-0003-3369-219X"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xinrun Wang","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0003-3369-219X","affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017743551","display_name":"Bo An","orcid":"https://orcid.org/0000-0002-7064-7438"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Bo An","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University, Singapore","Skywork AI, 2 Science Park Drive, Ascent Bridge+ #01-08, Singapore 118222, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-7064-7438","affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"Skywork AI, 2 Science Park Drive, Ascent Bridge+ #01-08, Singapore 118222, Singapore","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9164,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.78005423,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"01","issue":"02","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6952226161956787},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6357309818267822},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.6025524735450745},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4414187967777252},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.4312901496887207},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3646511733531952},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.08890476822853088}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6952226161956787},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6357309818267822},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.6025524735450745},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4414187967777252},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.4312901496887207},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3646511733531952},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.08890476822853088},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s2972335324500042","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s2972335324500042","pdf_url":null,"source":{"id":"https://openalex.org/S5407039934","display_name":"International Journal of Artificial Intelligence and Robotics Research","issn_l":"2972-3353","issn":["2972-3353","2972-3361"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Artificial Intelligence and Robotics Research","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8824738394","display_name":null,"funder_award_id":"ISG2-GC-2023-009","funder_id":"https://openalex.org/F4320320709","funder_display_name":"National Research Foundation Singapore"}],"funders":[{"id":"https://openalex.org/F4320320709","display_name":"National Research Foundation Singapore","ror":"https://ror.org/03cpyc314"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W59183349","https://openalex.org/W2799002257","https://openalex.org/W2999905431","https://openalex.org/W3109097593","https://openalex.org/W3116815090","https://openalex.org/W3148143011","https://openalex.org/W3172675210","https://openalex.org/W3211462570","https://openalex.org/W4205991051","https://openalex.org/W4212774754","https://openalex.org/W4322766882"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Despite":[0],"the":[1,20,23,30,58,93,98,114,119,141,175,220],"impressive":[2],"performance":[3,172],"across":[4],"numerous":[5],"tasks,":[6],"Large":[7],"Language":[8],"Models":[9],"(LLMs)":[10],"often":[11],"fail":[12],"in":[13,25,185],"solving":[14],"simple":[15],"decision-making":[16,72,188],"tasks":[17],"due":[18],"to":[19,49,74,107,112,161,174,207],"misalignment":[21],"of":[22,92,101,118,219],"knowledge":[24,52,91],"LLMs":[26,70,106],"with":[27,45,79,105,150],"environments.":[28,94],"On":[29],"contrary,":[31],"Reinforcement":[32],"Learning":[33],"(RL)":[34],"agents":[35,73],"learn":[36],"policies":[37],"from":[38,198],"scratch,":[39],"which":[40],"makes":[41],"them":[42],"always":[43],"align":[44,78],"environments":[46,81],"but":[47],"difficult":[48],"incorporate":[50],"prior":[51,90],"for":[53],"efficient":[54],"explorations.":[55],"To":[56],"narrow":[57],"gap,":[59],"we":[60,96,121,133],"propose":[61,122],"TWOSOME,":[62],"a":[63,135,228],"novel":[64,136],"general":[65],"online":[66,225],"framework":[67],"that":[68],"deploys":[69],"as":[71],"efficiently":[75],"interact":[76],"and":[77,116,126,143,171,180,191],"embodied":[80],"via":[82],"RL":[83,177],"without":[84],"requiring":[85],"any":[86],"prepared":[87],"datasets":[88],"or":[89],"First,":[95],"query":[97],"joint":[99],"probabilities":[100],"each":[102],"valid":[103],"action":[104],"form":[108],"behavior":[109],"policies.":[110],"Then,":[111],"enhance":[113],"stability":[115],"robustness":[117],"policies,":[120],"two":[123],"normalization":[124],"methods":[125],"summarize":[127],"four":[128],"prompt":[129,181],"design":[130,134],"principles.":[131],"Finally,":[132],"parameter-efficient":[137],"training":[138],"architecture":[139],"where":[140],"actor":[142],"critic":[144],"share":[145],"one":[146],"frozen":[147],"LLM":[148],"equipped":[149],"LOw-Rank":[151],"Adapters":[152],"(LoRA)":[153],"updated":[154],"by":[155],"PPO.":[156],"We":[157],"conduct":[158],"extensive":[159],"experiments":[160],"evaluate":[162],"TWOSOME.":[163],"(i)":[164],"TWOSOME":[165,202],"exhibits":[166],"significantly":[167],"better":[168],"sample":[169],"efficiency":[170],"compared":[173],"conventional":[176],"method,":[178,183],"PPO,":[179],"tuning":[182],"SayCan,":[184],"both":[186],"classical":[187],"environment,":[189,194],"Overcooked,":[190],"simulated":[192],"household":[193],"VirtualHome.":[195],"(ii)":[196],"Benefiting":[197],"LLMs\u2019":[199,221],"open-vocabulary":[200],"feature,":[201],"shows":[203],"superior":[204],"generalization":[205],"ability":[206,223],"unseen":[208],"tasks.":[209],"(iii)":[210],"Under":[211],"our":[212],"framework,":[213],"there":[214],"is":[215],"no":[216],"significant":[217],"loss":[218],"original":[222],"during":[224],"PPO":[226],"finetuning.":[227]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
