{"id":"https://openalex.org/W4401415710","doi":"https://doi.org/10.1109/icra57147.2024.10611112","title":"Vision-Language Interpreter for Robot Task Planning","display_name":"Vision-Language Interpreter for Robot Task Planning","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401415710","doi":"https://doi.org/10.1109/icra57147.2024.10611112"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10611112","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611112","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044045808","display_name":"Keisuke Shirai","orcid":"https://orcid.org/0000-0002-1403-8128"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keisuke Shirai","raw_affiliation_strings":["Kyoto University,Kyoto,Japan,606-8501"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kyoto University,Kyoto,Japan,606-8501","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001592945","display_name":"Cristian C. Beltran-Hernandez","orcid":"https://orcid.org/0000-0002-1134-009X"},"institutions":[{"id":"https://openalex.org/I146230289","display_name":"Omron (Japan)","ror":"https://ror.org/00q0w1h45","country_code":"JP","type":"company","lineage":["https://openalex.org/I146230289"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Cristian C. Beltran-Hernandez","raw_affiliation_strings":["OMRON SINIC X Corporation,Tokyo,Japan,113-0033"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OMRON SINIC X Corporation,Tokyo,Japan,113-0033","institution_ids":["https://openalex.org/I146230289"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034718334","display_name":"Masashi Hamaya","orcid":"https://orcid.org/0000-0003-4189-8219"},"institutions":[{"id":"https://openalex.org/I146230289","display_name":"Omron (Japan)","ror":"https://ror.org/00q0w1h45","country_code":"JP","type":"company","lineage":["https://openalex.org/I146230289"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masashi Hamaya","raw_affiliation_strings":["OMRON SINIC X Corporation,Tokyo,Japan,113-0033"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OMRON SINIC X Corporation,Tokyo,Japan,113-0033","institution_ids":["https://openalex.org/I146230289"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038408644","display_name":"Atsushi Hashimoto","orcid":"https://orcid.org/0000-0002-0799-4269"},"institutions":[{"id":"https://openalex.org/I146230289","display_name":"Omron (Japan)","ror":"https://ror.org/00q0w1h45","country_code":"JP","type":"company","lineage":["https://openalex.org/I146230289"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Atsushi Hashimoto","raw_affiliation_strings":["OMRON SINIC X Corporation,Tokyo,Japan,113-0033"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OMRON SINIC X Corporation,Tokyo,Japan,113-0033","institution_ids":["https://openalex.org/I146230289"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071741462","display_name":"Shohei Tanaka","orcid":"https://orcid.org/0000-0002-4257-5342"},"institutions":[{"id":"https://openalex.org/I146230289","display_name":"Omron (Japan)","ror":"https://ror.org/00q0w1h45","country_code":"JP","type":"company","lineage":["https://openalex.org/I146230289"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shohei Tanaka","raw_affiliation_strings":["OMRON SINIC X Corporation,Tokyo,Japan,113-0033"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OMRON SINIC X Corporation,Tokyo,Japan,113-0033","institution_ids":["https://openalex.org/I146230289"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087094940","display_name":"Kento Kawaharazuka","orcid":"https://orcid.org/0000-0002-7464-7187"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kento Kawaharazuka","raw_affiliation_strings":["University of Tokyo,Tokyo,Japan,113-8656"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Tokyo,Tokyo,Japan,113-8656","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045607922","display_name":"Kazutoshi Tanaka","orcid":"https://orcid.org/0000-0003-0880-9333"},"institutions":[{"id":"https://openalex.org/I146230289","display_name":"Omron (Japan)","ror":"https://ror.org/00q0w1h45","country_code":"JP","type":"company","lineage":["https://openalex.org/I146230289"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazutoshi Tanaka","raw_affiliation_strings":["OMRON SINIC X Corporation,Tokyo,Japan,113-0033"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OMRON SINIC X Corporation,Tokyo,Japan,113-0033","institution_ids":["https://openalex.org/I146230289"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077707500","display_name":"Yoshitaka Ushiku","orcid":"https://orcid.org/0000-0002-9014-1389"},"institutions":[{"id":"https://openalex.org/I146230289","display_name":"Omron (Japan)","ror":"https://ror.org/00q0w1h45","country_code":"JP","type":"company","lineage":["https://openalex.org/I146230289"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshitaka Ushiku","raw_affiliation_strings":["OMRON SINIC X Corporation,Tokyo,Japan,113-0033"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OMRON SINIC X Corporation,Tokyo,Japan,113-0033","institution_ids":["https://openalex.org/I146230289"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001224773","display_name":"Shinsuke Mori","orcid":"https://orcid.org/0000-0001-8596-8667"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinsuke Mori","raw_affiliation_strings":["University of Tokyo,Tokyo,Japan,113-8656"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Tokyo,Tokyo,Japan,113-8656","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":7.6555,"has_fulltext":false,"cited_by_count":35,"citation_normalized_percentile":{"value":0.98201062,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2051","last_page":"2058"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpreter","display_name":"Interpreter","score":0.8082436323165894},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7541230916976929},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6285470128059387},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5622851848602295},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.5251728892326355},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5146251916885376},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.37933549284935},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2362481951713562},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1274133324623108}],"concepts":[{"id":"https://openalex.org/C122783720","wikidata":"https://www.wikidata.org/wiki/Q183065","display_name":"Interpreter","level":2,"score":0.8082436323165894},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7541230916976929},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6285470128059387},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5622851848602295},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5251728892326355},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5146251916885376},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37933549284935},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2362481951713562},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1274133324623108},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10611112","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611112","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1903836554","https://openalex.org/W2054497239","https://openalex.org/W2064675550","https://openalex.org/W2119709400","https://openalex.org/W2122054842","https://openalex.org/W2161414194","https://openalex.org/W2479423890","https://openalex.org/W2579549467","https://openalex.org/W2607750299","https://openalex.org/W2615497679","https://openalex.org/W2950635152","https://openalex.org/W2962716343","https://openalex.org/W2963037989","https://openalex.org/W2963847595","https://openalex.org/W2967180672","https://openalex.org/W2980400095","https://openalex.org/W3001865277","https://openalex.org/W3003205975","https://openalex.org/W3034383590","https://openalex.org/W3097712622","https://openalex.org/W3098581361","https://openalex.org/W3150384095","https://openalex.org/W3173859428","https://openalex.org/W3201868981","https://openalex.org/W3202187802","https://openalex.org/W4205130889","https://openalex.org/W4224912544","https://openalex.org/W4288956373","https://openalex.org/W4312936847","https://openalex.org/W4320559489","https://openalex.org/W4322718191","https://openalex.org/W4366999541","https://openalex.org/W4383097638","https://openalex.org/W4383108457","https://openalex.org/W4385473486","https://openalex.org/W4388660746","https://openalex.org/W4404612908","https://openalex.org/W6602842217","https://openalex.org/W6774041431","https://openalex.org/W6778883912","https://openalex.org/W6809509765","https://openalex.org/W6809646742","https://openalex.org/W6810640255","https://openalex.org/W6837989031","https://openalex.org/W6838865847","https://openalex.org/W6849177959","https://openalex.org/W6850072970","https://openalex.org/W6850625674","https://openalex.org/W6850787431","https://openalex.org/W6852136651","https://openalex.org/W6852800892","https://openalex.org/W6854738657"],"related_works":["https://openalex.org/W1721418433","https://openalex.org/W2463512549","https://openalex.org/W2246915267","https://openalex.org/W2004305899","https://openalex.org/W1844878253","https://openalex.org/W2611574984","https://openalex.org/W3089412887","https://openalex.org/W2548985027","https://openalex.org/W3213722473","https://openalex.org/W3196817267"],"abstract_inverted_index":{"Large":[0],"language":[1,60],"models":[2],"(LLMs)":[3],"are":[4,175],"accelerating":[5],"the":[6,16,50,102,110,117,133],"development":[7],"of":[8,18],"language-guided":[9,72],"robot":[10,122],"planners.":[11],"Meanwhile,":[12],"symbolic":[13,68,103,118],"planners":[14,51,69],"offer":[15],"advantage":[17],"interpretability.":[19],"This":[20],"paper":[21],"proposes":[22],"a":[23,41,45,54,71,76,80,129],"new":[24,81,145],"task":[25],"that":[26,83,151],"bridges":[27],"these":[28],"two":[29],"trends,":[30],"namely,":[31],"multimodal":[32],"planning":[33],"problem":[34,42,134],"specification.":[35],"The":[36,139],"aim":[37,106],"is":[38,107,141],"to":[39,52,108],"generate":[40,120,154],"description":[43,135],"(PD),":[44],"machine-readable":[46],"file":[47],"used":[48],"by":[49],"find":[53],"plan.":[55],"By":[56],"generating":[57],"PDs":[58,85,96],"from":[59,101],"instruction":[61],"and":[62,89,116,163,173],"scene":[63],"observation,":[64],"we":[65,127],"can":[66,93,114,153],"drive":[67],"in":[70],"framework.":[73],"We":[74],"propose":[75],"Vision-Language":[77],"Interpreter":[78],"(ViLaIn),":[79],"framework":[82,140],"generates":[84],"using":[86],"state-of-the-art":[87],"LLM":[88],"vision-language":[90],"models.":[91],"ViLaIn":[92,115,152],"refine":[94],"generated":[95],"via":[97],"error":[98],"message":[99],"feedback":[100],"planner.":[104],"Our":[105,171],"answer":[109],"question:":[111],"How":[112],"accurately":[113],"planner":[119],"valid":[121,164],"plans?":[123],"To":[124],"evaluate":[125],"ViLaIn,":[126],"introduce":[128],"novel":[130],"dataset":[131,174],"called":[132],"generation":[136],"(ProDG)":[137],"dataset.":[138],"evaluated":[142],"with":[143,158,166],"four":[144],"evaluation":[146],"metrics.":[147],"Experimental":[148],"results":[149],"show":[150],"syntactically":[155],"correct":[156],"problems":[157],"more":[159,167],"than":[160,168],"99%":[161],"accuracy":[162],"plans":[165],"58%":[169],"accuracy.":[170],"code":[172],"available":[176],"at":[177],"https://github.com/omron-sinicx/ViLaIn.":[178]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":23},{"year":2024,"cited_by_count":6}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
