{"id":"https://openalex.org/W4401415888","doi":"https://doi.org/10.1109/icra57147.2024.10610216","title":"RoboVQA: Multimodal Long-Horizon Reasoning for Robotics","display_name":"RoboVQA: Multimodal Long-Horizon Reasoning for Robotics","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401415888","doi":"https://doi.org/10.1109/icra57147.2024.10610216"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610216","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610216","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066544075","display_name":"Pierre Sermanet","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":true,"raw_author_name":"Pierre Sermanet","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001795595","display_name":"Tianli Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Tianli Ding","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080095155","display_name":"Jeffrey Zhao","orcid":"https://orcid.org/0000-0001-7046-5434"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Jeffrey Zhao","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100676786","display_name":"Fei Xia","orcid":"https://orcid.org/0009-0002-4609-9950"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Fei Xia","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047363556","display_name":"Debidatta Dwibedi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Debidatta Dwibedi","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025695094","display_name":"Keerthana Gopalakrishnan","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Keerthana Gopalakrishnan","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041653123","display_name":"Christine W. Chan","orcid":"https://orcid.org/0000-0003-0547-761X"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Christine Chan","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008880429","display_name":"Gabriel Dulac-Arnold","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Gabriel Dulac-Arnold","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014521485","display_name":"Sharath Maddineni","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Sharath Maddineni","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085810270","display_name":"Nikhil J Joshi","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Nikhil J Joshi","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021289514","display_name":"Pete Florence","orcid":"https://orcid.org/0000-0002-7148-5645"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Pete Florence","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100452682","display_name":"Wei Han","orcid":"https://orcid.org/0000-0002-1966-446X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Wei Han","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022072670","display_name":"Robert Baruch","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Robert Baruch","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102831128","display_name":"Yao Lu","orcid":"https://orcid.org/0000-0002-3147-2081"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Yao Lu","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008545107","display_name":"Suvir Mirchandani","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Suvir Mirchandani","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101929475","display_name":"Peng Xu","orcid":"https://orcid.org/0000-0002-6431-1110"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Peng Xu","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064611781","display_name":"Pannag Sanketi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Pannag Sanketi","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088777896","display_name":"Karol Hausman","orcid":"https://orcid.org/0000-0002-1504-6197"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Karol Hausman","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025419994","display_name":"Izhak Shafran","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Izhak Shafran","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018507768","display_name":"Brian Ichter","orcid":"https://orcid.org/0000-0002-6955-6432"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Brian Ichter","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074890965","display_name":"Yuan Cao","orcid":"https://orcid.org/0000-0002-1445-8210"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Yuan Cao","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":21,"corresponding_author_ids":["https://openalex.org/A5066544075"],"corresponding_institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"],"apc_list":null,"apc_paid":null,"fwci":8.0595,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.98328065,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"645","last_page":"652"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7065899968147278},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.678636908531189},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6120472550392151},{"id":"https://openalex.org/keywords/horizon","display_name":"Horizon","score":0.4502045214176178},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3370818495750427},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3011934161186218},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1195303201675415}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7065899968147278},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.678636908531189},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6120472550392151},{"id":"https://openalex.org/C159176650","wikidata":"https://www.wikidata.org/wiki/Q43261","display_name":"Horizon","level":2,"score":0.4502045214176178},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3370818495750427},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3011934161186218},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1195303201675415},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610216","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610216","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W2153738822","https://openalex.org/W2556388456","https://openalex.org/W2739107216","https://openalex.org/W2798963049","https://openalex.org/W2899575547","https://openalex.org/W2963177403","https://openalex.org/W2963541336","https://openalex.org/W2964220823","https://openalex.org/W2984008963","https://openalex.org/W3023742835","https://openalex.org/W3025552214","https://openalex.org/W3175961224","https://openalex.org/W3193402170","https://openalex.org/W3197457832","https://openalex.org/W3203663566","https://openalex.org/W3205786327","https://openalex.org/W3217340782","https://openalex.org/W4221143046","https://openalex.org/W4224308101","https://openalex.org/W4229042118","https://openalex.org/W4234886047","https://openalex.org/W4286892945","https://openalex.org/W4296406182","https://openalex.org/W4311252325","https://openalex.org/W4312230726","https://openalex.org/W4323572061","https://openalex.org/W4366999541","https://openalex.org/W4367333207","https://openalex.org/W4379255983","https://openalex.org/W4383109478","https://openalex.org/W4385430679","https://openalex.org/W4390874280","https://openalex.org/W4391759936","https://openalex.org/W4394659899","https://openalex.org/W4401415888","https://openalex.org/W6738212785","https://openalex.org/W6749916090","https://openalex.org/W6750355821","https://openalex.org/W6777076129","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6800139874","https://openalex.org/W6802366246","https://openalex.org/W6803096969","https://openalex.org/W6809509765","https://openalex.org/W6809646742","https://openalex.org/W6810081322","https://openalex.org/W6811013733","https://openalex.org/W6811072154","https://openalex.org/W6842585177","https://openalex.org/W6847435255","https://openalex.org/W6850503672","https://openalex.org/W6852136651","https://openalex.org/W6853754142","https://openalex.org/W6864544085"],"related_works":["https://openalex.org/W1508899372","https://openalex.org/W2012658348","https://openalex.org/W4233452137","https://openalex.org/W4254857216","https://openalex.org/W4231626925","https://openalex.org/W2962829499","https://openalex.org/W2767518918","https://openalex.org/W2131444840","https://openalex.org/W2414929284","https://openalex.org/W192849109"],"abstract_inverted_index":{"We":[0,36,90,119,142,175],"present":[1],"a":[2,101,121,171,177,192,204,241],"scalable,":[3],"bottom-up":[4],"and":[5,20,23,52,97,123,222,288,303,318,324],"intrinsically":[6],"diverse":[7,125],"data":[8,39,81,245,259],"collection":[9,95,114,260],"scheme":[10],"that":[11,24,67,99,187,240,265],"can":[12,293],"be":[13,248,294,311],"used":[14,295],"for":[15,100,137,250,257,321],"high-level":[16,196],"reasoning":[17,197],"with":[18,59,116,150,162,203,235,272],"long":[19],"medium":[21],"horizons":[22],"has":[25],"2.2x":[26],"higher":[27],"throughput":[28],"compared":[29],"to":[30,107,157,225,247,285,310],"traditional":[31],"narrow":[32],"top-down":[33],"step-by-step":[34],"collection.":[35,118],"collect":[37],"realistic":[38,201],"by":[40],"performing":[41,155,191],"any":[42],"user":[43],"requests":[44],"within":[45],"the":[46,79,92,111,211,215,254,291],"entirety":[47],"of":[48,94,110,190,194,214,243,278],"3":[49],"office":[50],"buildings":[51],"using":[53],"multiple":[54],"embodiments":[55,72],"(robot,":[56],"human,":[57],"human":[58,113,163],"grasping":[60],"tool).":[61],"With":[62],"this":[63],"data,":[64],"we":[65,263],"show":[66,264],"models":[68,238],"trained":[69,77,183],"on":[70,78,87,184],"all":[71,281],"perform":[73],"better":[74],"than":[75,210,314],"ones":[76],"robot":[80,88,117,148],"only,":[82],"even":[83,165],"when":[84],"evaluated":[85],"solely":[86],"episodes.":[89],"explore":[91],"economics":[93],"costs":[96],"find":[98],"fixed":[102],"budget":[103],"it":[104,160],"is":[105,188,223],"beneficial":[106],"take":[108],"advantage":[109],"cheaper":[112],"along":[115],"release":[120],"large":[122],"highly":[124],"(29,520":[126],"unique":[127],"instructions)":[128],"dataset":[129,186,289],"dubbed":[130],"RoboVQA":[131],"containing":[132],"829,502":[133],"(video,":[134],"text)":[135],"pairs":[136],"robotics-focused":[138],"visual":[139,217],"question":[140],"answering.":[141],"also":[143,169],"demonstrate":[144,176],"how":[145],"evaluating":[146],"real":[147,227],"experiments":[149],"an":[151,273],"intervention":[152,206],"mechanism":[153],"enables":[154],"tasks":[156,198],"completion,":[158],"making":[159],"deployable":[161],"oversight":[164],"if":[166],"imperfect":[167],"while":[168],"providing":[170],"single":[172,178],"performance":[173,233],"metric.":[174],"video-conditioned":[179],"model":[180,219,292],"named":[181],"RoboVQA-VideoCoCa":[182],"our":[185],"capable":[189],"variety":[193],"grounded":[195,244],"in":[199,305],"broad":[200],"settings":[202],"cognitive":[205],"rate":[207,276],"46%":[208],"lower":[209],"zeroshot":[212],"state":[213],"art":[216],"language":[218],"(VLM)":[220],"baseline":[221],"able":[224],"guide":[226],"robots":[228],"through":[229],"long-horizon":[230],"tasks.":[231,283],"The":[232],"gap":[234],"zero-shot":[236],"state-of-the-art":[237],"indicates":[239],"lot":[242],"remains":[246],"collected":[249],"real-world":[251],"deployment,":[252],"emphasizing":[253],"critical":[255],"need":[256],"scalable":[258],"approaches.":[261],"Finally,":[262],"video":[266,286,298],"VLMs":[267,271],"significantly":[268],"outperform":[269],"single-image":[270],"average":[274],"error":[275],"reduction":[277],"19%":[279],"across":[280],"VQA":[282],"Thanks":[284],"conditioning":[287],"diversity,":[290],"as":[296],"general":[297],"value":[299],"functions":[300],"(e.g.":[301],"success":[302],"affordance)":[304],"situations":[306],"where":[307],"actions":[308],"needs":[309],"recognized":[312],"rather":[313],"states,":[315],"expanding":[316],"capabilities":[317],"environment":[319],"understanding":[320],"robots.":[322],"Data":[323],"videos":[325],"are":[326],"available":[327],"at":[328],"robovqa.github.io":[329]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":24},{"year":2024,"cited_by_count":5}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
