{"id":"https://openalex.org/W4388327601","doi":"https://doi.org/10.48550/arxiv.2311.01378","title":"Vision-Language Foundation Models as Effective Robot Imitators","display_name":"Vision-Language Foundation Models as Effective Robot Imitators","publication_year":2023,"publication_date":"2023-11-02","ids":{"openalex":"https://openalex.org/W4388327601","doi":"https://doi.org/10.48550/arxiv.2311.01378"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2311.01378","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.01378","pdf_url":"https://arxiv.org/pdf/2311.01378","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2311.01378","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015021270","display_name":"Xinghang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Xinghang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101433372","display_name":"Minghuan Liu","orcid":"https://orcid.org/0009-0009-5585-1746"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Minghuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074050672","display_name":"Hanbo Zhang","orcid":"https://orcid.org/0000-0002-2235-6855"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hanbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030780415","display_name":"Cunjun Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Cunjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044771462","display_name":"Jie Xu","orcid":"https://orcid.org/0000-0002-0515-1647"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101733307","display_name":"Hongtao Wu","orcid":"https://orcid.org/0009-0007-4863-5119"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Hongtao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071789313","display_name":"Chilam Cheang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheang, Chilam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101231251","display_name":"Ya Jing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jing, Ya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090720315","display_name":"Weinan Zhang","orcid":"https://orcid.org/0000-0002-0127-2425"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Weinan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041101317","display_name":"Huaping Liu","orcid":"https://orcid.org/0000-0002-4042-6044"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Huaping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100455139","display_name":"Hang Li","orcid":"https://orcid.org/0000-0002-6221-6195"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5065667064","display_name":"Tao Kong","orcid":"https://orcid.org/0000-0002-9412-1457"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kong, Tao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5015021270"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9797999858856201,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9710000157356262,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.7703553438186646},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7533233165740967},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7343716621398926},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.6111023426055908},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.6068887114524841},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.5430938005447388},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5358214378356934},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.499178409576416},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4817662239074707},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.43333548307418823},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3028082847595215},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.17366984486579895}],"concepts":[{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.7703553438186646},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7533233165740967},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7343716621398926},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.6111023426055908},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.6068887114524841},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.5430938005447388},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5358214378356934},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.499178409576416},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4817662239074707},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.43333548307418823},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3028082847595215},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.17366984486579895},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2311.01378","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.01378","pdf_url":"https://arxiv.org/pdf/2311.01378","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2311.01378","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2311.01378","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2311.01378","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.01378","pdf_url":"https://arxiv.org/pdf/2311.01378","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2935909890","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W1531601525","https://openalex.org/W2770234245"],"abstract_inverted_index":{"Recent":[0],"progress":[1],"in":[2],"vision":[3,18],"language":[4,19],"foundation":[5],"models":[6,35,74],"has":[7,161],"shown":[8],"their":[9,181],"ability":[10,178],"to":[11,133,136,164,179],"understand":[12],"multimodal":[13],"data":[14],"and":[15,50,83,105,130,168],"resolve":[16],"complicated":[17],"tasks,":[20],"including":[21],"robotics":[22,41,172,183],"manipulation.":[23],"We":[24,158],"seek":[25],"a":[26,48,96,116,166],"straightforward":[27],"way":[28],"of":[29,32,151],"making":[30],"use":[31],"existing":[33],"vision-language":[34,52,72],"(VLMs)":[36],"with":[37,78,115,176],"simple":[38,49],"fine-tuning":[39],"on":[40,91,107,119,155],"data.":[42],"To":[43],"this":[44],"end,":[45],"we":[46,123],"derive":[47],"novel":[51],"manipulation":[53,93,156],"framework,":[54],"dubbed":[55],"RoboFlamingo,":[56],"built":[57],"upon":[58],"the":[59,100,112,120,149,162,177],"open-source":[60],"VLMs,":[61],"OpenFlamingo.":[62],"Unlike":[63],"prior":[64],"works,":[65],"RoboFlamingo":[66,99,125,160],"utilizes":[67],"pre-trained":[68,153],"VLMs":[69,135,154],"for":[70,102,171],"single-step":[71],"comprehension,":[73],"sequential":[75],"history":[76],"information":[77],"an":[79,128],"explicit":[80],"policy":[81],"head,":[82],"is":[84],"slightly":[85],"fine-tuned":[86],"by":[87],"imitation":[88],"learning":[89],"only":[90],"language-conditioned":[92],"datasets.":[94],"Such":[95],"decomposition":[97],"provides":[98],"flexibility":[101],"open-loop":[103],"control":[104],"deployment":[106],"low-performance":[108],"platforms.":[109],"By":[110],"exceeding":[111],"state-of-the-art":[113],"performance":[114],"large":[117],"margin":[118],"tested":[121],"benchmark,":[122],"show":[124],"can":[126],"be":[127,165],"effective":[129],"competitive":[131],"alternative":[132],"adapt":[134],"robot":[137],"control.":[138],"Our":[139],"extensive":[140],"experimental":[141],"results":[142],"also":[143],"reveal":[144],"several":[145],"interesting":[146],"conclusions":[147],"regarding":[148],"behavior":[150],"different":[152],"tasks.":[157],"believe":[159],"potential":[163],"cost-effective":[167],"easy-to-use":[169],"solution":[170],"manipulation,":[173],"empowering":[174],"everyone":[175],"fine-tune":[180],"own":[182],"policy.":[184]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}
