{"id":"https://openalex.org/W4408351960","doi":"https://doi.org/10.1109/icassp49660.2025.10888998","title":"Sample Efficient Reinforcement Learning via Large Vision Language Model Distillation","display_name":"Sample Efficient Reinforcement Learning via Large Vision Language Model Distillation","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408351960","doi":"https://doi.org/10.1109/icassp49660.2025.10888998"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10888998","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888998","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2505.11221","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100422970","display_name":"Dong\u2010Hoon Lee","orcid":"https://orcid.org/0000-0002-5013-4440"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Donghoon Lee","raw_affiliation_strings":["KAIST,Robotics Program,Daejeon,South Korea"],"affiliations":[{"raw_affiliation_string":"KAIST,Robotics Program,Daejeon,South Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028393159","display_name":"Tung M. Luu","orcid":"https://orcid.org/0000-0001-9488-7463"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Tung M. Luu","raw_affiliation_strings":["KAIST,Electrical Engineering,Daejeon,South Korea"],"affiliations":[{"raw_affiliation_string":"KAIST,Electrical Engineering,Daejeon,South Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101422405","display_name":"Young-Hwan Lee","orcid":"https://orcid.org/0009-0002-2310-3056"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Younghwan Lee","raw_affiliation_strings":["KAIST,Electrical Engineering,Daejeon,South Korea"],"affiliations":[{"raw_affiliation_string":"KAIST,Electrical Engineering,Daejeon,South Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073287748","display_name":"Chang D. Yoo","orcid":"https://orcid.org/0000-0002-0756-7179"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Chang D. Yoo","raw_affiliation_strings":["KAIST,Electrical Engineering,Daejeon,South Korea"],"affiliations":[{"raw_affiliation_string":"KAIST,Electrical Engineering,Daejeon,South Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100422970"],"corresponding_institution_ids":["https://openalex.org/I157485424"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03332752,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9369000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9369000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.794806957244873},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7618119716644287},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7049527764320374},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6711347103118896},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5751394629478455},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4531330466270447},{"id":"https://openalex.org/keywords/sample-complexity","display_name":"Sample complexity","score":0.441951721906662},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.0783759355545044},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.06786665320396423}],"concepts":[{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.794806957244873},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7618119716644287},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7049527764320374},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6711347103118896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5751394629478455},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4531330466270447},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.441951721906662},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0783759355545044},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.06786665320396423}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp49660.2025.10888998","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888998","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2505.11221","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.11221","pdf_url":"https://arxiv.org/pdf/2505.11221","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2505.11221","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.11221","pdf_url":"https://arxiv.org/pdf/2505.11221","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1845972764","https://openalex.org/W2201087437","https://openalex.org/W2260756217","https://openalex.org/W2736601468","https://openalex.org/W2909906617","https://openalex.org/W2962872206","https://openalex.org/W2989847975","https://openalex.org/W3100789280","https://openalex.org/W3141423234","https://openalex.org/W4246078117","https://openalex.org/W4285154955","https://openalex.org/W4383108457","https://openalex.org/W4392679398","https://openalex.org/W4405785192","https://openalex.org/W6638523607","https://openalex.org/W6675999342","https://openalex.org/W6685726866","https://openalex.org/W6732837357","https://openalex.org/W6748440607","https://openalex.org/W6753243525","https://openalex.org/W6763342845","https://openalex.org/W6772005887","https://openalex.org/W6778883912","https://openalex.org/W6800751262","https://openalex.org/W6809509765","https://openalex.org/W6810081322","https://openalex.org/W6810640255","https://openalex.org/W6839928859","https://openalex.org/W6844961931","https://openalex.org/W6849548236","https://openalex.org/W6849843017","https://openalex.org/W6849861922","https://openalex.org/W6850625674","https://openalex.org/W6852884379","https://openalex.org/W6852904746","https://openalex.org/W6853313673","https://openalex.org/W6857415689","https://openalex.org/W6857462693","https://openalex.org/W6858048387","https://openalex.org/W6860855263","https://openalex.org/W6861860832","https://openalex.org/W6866278122","https://openalex.org/W6867621267","https://openalex.org/W6871553294","https://openalex.org/W6922480057"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W2607437843","https://openalex.org/W4295700147","https://openalex.org/W2963162727","https://openalex.org/W2116157560","https://openalex.org/W2106714532","https://openalex.org/W2877093712","https://openalex.org/W4297979791","https://openalex.org/W4310614650"],"abstract_inverted_index":{"Recent":[0],"research":[1],"highlights":[2],"the":[3,74,87,97,105,112,123,130,143],"potential":[4],"of":[5,100,129,146],"multi-modal":[6],"foundation":[7],"models":[8,64],"in":[9,96],"tackling":[10],"complex":[11],"decision-making":[12],"challenges.":[13],"However,":[14],"their":[15],"large":[16,62],"parameters":[17],"make":[18],"real-world":[19],"deployment":[20],"resource-intensive":[21],"and":[22],"often":[23],"impractical":[24],"for":[25,33,125],"constrained":[26],"systems.":[27],"Reinforcement":[28],"learning":[29,107],"(RL)":[30],"shows":[31],"promise":[32],"task-specific":[34],"agents":[35],"but":[36],"suffers":[37],"from":[38,61,118],"high":[39],"sample":[40,144],"complexity,":[41],"limiting":[42],"practical":[43],"applications.":[44],"To":[45],"address":[46],"these":[47],"challenges,":[48],"we":[49,121],"introduce":[50],"LVLM":[51,75,113],"to":[52,114],"Policy":[53],"(LVLM2P),":[54],"a":[55,77],"novel":[56],"framework":[57],"that":[58,139],"distills":[59],"knowledge":[60],"vision-language":[63],"(LVLM)":[65],"into":[66],"more":[67],"efficient":[68],"RL":[69,88,148],"agents.":[70],"Our":[71],"approach":[72],"leverages":[73],"as":[76],"teacher,":[78],"providing":[79],"instructional":[80],"actions":[81,116],"based":[82],"on":[83],"trajectories":[84],"collected":[85],"by":[86,110],"agent,":[89],"which":[90],"helps":[91],"reduce":[92],"less":[93],"meaningful":[94],"exploration":[95],"early":[98],"stages":[99],"learning,":[101],"thereby":[102],"significantly":[103,141],"accelerating":[104],"agent\u2019s":[106],"progress.":[108],"Additionally,":[109],"leveraging":[111],"suggest":[115],"directly":[117],"visual":[119],"observations,":[120],"eliminate":[122],"need":[124],"manual":[126],"textual":[127],"descriptors":[128],"environment,":[131],"enhancing":[132],"applicability":[133],"across":[134],"diverse":[135],"tasks.":[136],"Experiments":[137],"show":[138],"LVLM2P":[140],"enhances":[142],"efficiency":[145],"baseline":[147],"algorithms.":[149],"The":[150],"code":[151],"is":[152],"available":[153],"at":[154],"https://github.com/i22024/LVLM2P":[155]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
