{"id":"https://openalex.org/W4396877948","doi":"https://doi.org/10.1109/lra.2024.3400189","title":"RLingua: Improving Reinforcement Learning Sample Efficiency in Robotic Manipulations With Large Language Models","display_name":"RLingua: Improving Reinforcement Learning Sample Efficiency in Robotic Manipulations With Large Language Models","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4396877948","doi":"https://doi.org/10.1109/lra.2024.3400189"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2024.3400189","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3400189","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100729607","display_name":"Liangliang Chen","orcid":"https://orcid.org/0000-0002-9594-640X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Liangliang Chen","raw_affiliation_strings":["Robotics and Autonomous Driving Lab, Baidu Research, Sunnyvale, CA, USA","School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, USA"],"affiliations":[{"raw_affiliation_string":"Robotics and Autonomous Driving Lab, Baidu Research, Sunnyvale, CA, USA","institution_ids":[]},{"raw_affiliation_string":"School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106404175","display_name":"Yutian Lei","orcid":"https://orcid.org/0009-0006-5793-2513"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yutian Lei","raw_affiliation_strings":["Robotics and Autonomous Driving Lab, Baidu Research, Sunnyvale, CA, USA"],"affiliations":[{"raw_affiliation_string":"Robotics and Autonomous Driving Lab, Baidu Research, Sunnyvale, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050808028","display_name":"Shiyu Jin","orcid":"https://orcid.org/0000-0002-6475-5942"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shiyu Jin","raw_affiliation_strings":["Robotics and Autonomous Driving Lab, Baidu Research, Sunnyvale, CA, USA"],"affiliations":[{"raw_affiliation_string":"Robotics and Autonomous Driving Lab, Baidu Research, Sunnyvale, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100386001","display_name":"Ying Zhang","orcid":"https://orcid.org/0000-0001-5246-2141"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ying Zhang","raw_affiliation_strings":["School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101843626","display_name":"Liangjun Zhang","orcid":"https://orcid.org/0000-0001-5737-2540"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liangjun Zhang","raw_affiliation_strings":["Robotics and Autonomous Driving Lab, Baidu Research, Sunnyvale, CA, USA"],"affiliations":[{"raw_affiliation_string":"Robotics and Autonomous Driving Lab, Baidu Research, Sunnyvale, CA, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100729607"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":6.9503,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.97331555,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"9","issue":"7","first_page":"6075","last_page":"6082"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9567999839782715,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9567999839782715,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9320999979972839,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7951154708862305},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6390207409858704},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5588499307632446},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5485221147537231},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47048428654670715},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.282459557056427},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.05461341142654419},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.0429520308971405}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7951154708862305},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6390207409858704},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5588499307632446},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5485221147537231},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47048428654670715},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.282459557056427},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.05461341142654419},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0429520308971405},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2024.3400189","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3400189","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2145339207","https://openalex.org/W2741122588","https://openalex.org/W2746553466","https://openalex.org/W2788862220","https://openalex.org/W2902907165","https://openalex.org/W2963099939","https://openalex.org/W3007769740","https://openalex.org/W3121342653","https://openalex.org/W4281763794","https://openalex.org/W4287113380","https://openalex.org/W4360836968","https://openalex.org/W4383097638","https://openalex.org/W4386320380","https://openalex.org/W4387294588","https://openalex.org/W4387595951","https://openalex.org/W4393023499","https://openalex.org/W6640174482","https://openalex.org/W6677067356","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6740801417","https://openalex.org/W6742461812","https://openalex.org/W6748839928","https://openalex.org/W6755421158","https://openalex.org/W6809509765","https://openalex.org/W6809646742","https://openalex.org/W6810640255","https://openalex.org/W6810738896","https://openalex.org/W6839928859","https://openalex.org/W6849548236","https://openalex.org/W6850936240","https://openalex.org/W6853664340","https://openalex.org/W6853677386","https://openalex.org/W6854929498","https://openalex.org/W6856471725","https://openalex.org/W6857151620","https://openalex.org/W6857614378","https://openalex.org/W6857732217","https://openalex.org/W6858048387","https://openalex.org/W6862975523"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,125],"(RL)":[2],"has":[3],"demonstrated":[4],"its":[5,15],"capability":[6],"in":[7,45,80,157,170,176,191],"solving":[8],"various":[9],"tasks":[10,160,175],"but":[11],"is":[12,91],"notorious":[13],"for":[14,56,73],"low":[16],"sample":[17,41,106,153],"efficiency.":[18,107],"In":[19],"this":[20,49],"paper,":[21],"we":[22,51,187],"propose":[23],"RLingua,":[24],"a":[25,54,68,74,81,100,133],"framework":[26],"that":[27,67,147,198],"can":[28,77,149],"leverage":[29],"the":[30,40,58,87,111,118,123,127,138,152,182,199],"internal":[31],"knowledge":[32,60],"of":[33,43,61,136,155,161],"large":[34],"language":[35],"models":[36],"(LLMs)":[37],"to":[38,93,121,205],"reduce":[39,151],"complexity":[42,154],"RL":[44,113],"robotic":[46],"manipulations.":[47],"To":[48],"end,":[50],"first":[52],"present":[53],"method":[55,135],"extracting":[57],"prior":[59],"LLMs":[62],"by":[63,143],"prompt":[64],"engineering":[65],"so":[66],"preliminary":[69],"rule-based":[70],"robot":[71,89,141,159,174,193,207],"controller":[72,90],"specific":[75],"task":[76],"be":[78],"generated":[79],"user-friendly":[82],"manner.":[83],"Despite":[84],"being":[85],"imperfect,":[86],"LLM-generated":[88,128,140],"utilized":[92],"produce":[94],"action":[95],"samples":[96],"during":[97],"rollouts":[98],"with":[99],"decaying":[101],"probability,":[102],"thereby":[103],"improving":[104,137],"RL's":[105],"We":[108,145],"employ":[109],"TD3,":[110],"widely-used":[112],"baseline":[114],"method,":[115],"and":[116,165],"modify":[117],"actor":[119],"loss":[120],"regularize":[122],"policy":[124],"towards":[126],"controller.":[129],"RLingua":[130,148],"also":[131],"provides":[132],"novel":[134],"imperfect":[139],"controllers":[142],"RL.":[144],"demonstrate":[146],"significantly":[150],"TD3":[156,184],"four":[158],"<monospace":[162,177],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[163,178,214],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">panda_gym</monospace>":[164],"achieve":[166],"high":[167],"success":[168],"rates":[169],"12":[171],"sparsely":[172],"rewarded":[173],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">RLBench</monospace>":[179],",":[180],"where":[181],"standard":[183],"fails.":[185],"Additionally,":[186],"validated":[188],"RLingua's":[189],"effectiveness":[190],"real-world":[192],"experiments":[194],"through":[195],"Sim2Real,":[196],"demonstrating":[197],"learned":[200],"policies":[201],"are":[202],"effectively":[203],"transferable":[204],"real":[206],"tasks.":[208],"For":[209],"videos,":[210],"please":[211],"visit":[212],"<uri":[213],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://rlingua.github.io</uri>":[215],".":[216]},"counts_by_year":[{"year":2025,"cited_by_count":19},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
