{"id":"https://openalex.org/W4416750267","doi":"https://doi.org/10.1109/iros60139.2025.11246314","title":"Vision-Language Guided Adaptive Robot Action Planning: Responding to Intermediate Results and Implicit Human Intentions","display_name":"Vision-Language Guided Adaptive Robot Action Planning: Responding to Intermediate Results and Implicit Human Intentions","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416750267","doi":"https://doi.org/10.1109/iros60139.2025.11246314"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11246314","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246314","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088555680","display_name":"Weihao Cai","orcid":"https://orcid.org/0000-0002-3233-027X"},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Weihao Cai","raw_affiliation_strings":["Ritsumeikan University,College of Information Science and Engineering,Osaka,Japan"],"affiliations":[{"raw_affiliation_string":"Ritsumeikan University,College of Information Science and Engineering,Osaka,Japan","institution_ids":["https://openalex.org/I135768898"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036073690","display_name":"Yoshiki Mori","orcid":"https://orcid.org/0000-0002-6223-8634"},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"Osaka University","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshiki Mori","raw_affiliation_strings":["The University of Osaka,Graduate School of Engineering Science,Osaka,Japan"],"affiliations":[{"raw_affiliation_string":"The University of Osaka,Graduate School of Engineering Science,Osaka,Japan","institution_ids":["https://openalex.org/I98285908"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041905353","display_name":"Nobutaka Shimada","orcid":null},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Nobutaka Shimada","raw_affiliation_strings":["Ritsumeikan University,College of Information Science and Engineering,Osaka,Japan"],"affiliations":[{"raw_affiliation_string":"Ritsumeikan University,College of Information Science and Engineering,Osaka,Japan","institution_ids":["https://openalex.org/I135768898"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5088555680"],"corresponding_institution_ids":["https://openalex.org/I135768898"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40272214,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"329","last_page":"334"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5404000282287598,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5404000282287598,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.15729999542236328,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.10119999945163727,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.7502999901771545},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7354999780654907},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.7312999963760376},{"id":"https://openalex.org/keywords/human\u2013robot-interaction","display_name":"Human\u2013robot interaction","score":0.4781999886035919},{"id":"https://openalex.org/keywords/action-selection","display_name":"Action selection","score":0.42179998755455017},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.41909998655319214},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.3961000144481659}],"concepts":[{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.7502999901771545},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7354999780654907},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.7312999963760376},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7211999893188477},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5982000231742859},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5899999737739563},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.4781999886035919},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.42179998755455017},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.41909998655319214},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.3961000144481659},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.33820000290870667},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.3312000036239624},{"id":"https://openalex.org/C74222875","wikidata":"https://www.wikidata.org/wiki/Q16000312","display_name":"Robot kinematics","level":4,"score":0.29190000891685486},{"id":"https://openalex.org/C162947575","wikidata":"https://www.wikidata.org/wiki/Q2005645","display_name":"Social robot","level":5,"score":0.29010000824928284},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27320000529289246},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.26919999718666077},{"id":"https://openalex.org/C65401140","wikidata":"https://www.wikidata.org/wiki/Q7353385","display_name":"Robot control","level":4,"score":0.2667999863624573},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.26269999146461487},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11246314","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246314","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1520597402","https://openalex.org/W2498525044","https://openalex.org/W2963634205","https://openalex.org/W2976205474","https://openalex.org/W2994446013","https://openalex.org/W4283766490","https://openalex.org/W4386076325","https://openalex.org/W4386320380","https://openalex.org/W4389520252","https://openalex.org/W4389523832","https://openalex.org/W4403296141","https://openalex.org/W4412886751"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,126],"research":[3],"have":[4],"demonstrated":[5],"that":[6,24,124],"Vision-Language":[7],"Models":[8],"(VLMs)":[9],"are":[10],"a":[11,21,87,117],"promising":[12],"technology":[13,115],"for":[14,36,110],"robot":[15,33,76,121],"task":[16],"planning.":[17],"This":[18],"paper":[19],"presents":[20],"novel":[22],"approach":[23,119],"leverages":[25],"visual":[26],"prompts":[27,106],"and":[28,61,63,129],"VLMs":[29],"to":[30,73,79,89,120],"generate":[31,74],"feasible":[32],"action":[34,77,97,122],"sequences":[35,78,92],"achieving":[37],"shared":[38],"tasks":[39],"through":[40,93],"human-robot":[41],"collaboration":[42],"while":[43],"simultaneously":[44],"estimating":[45],"human":[46,66],"intentions.":[47],"Our":[48],"method":[49],"enhances":[50],"VLMs\u2019":[51],"understanding":[52],"of":[53],"the":[54,84],"environment":[55],"by":[56,104],"utilizing":[57],"annotations":[58],"(bounding":[59],"boxes":[60],"labels)":[62],"dynamically":[64],"infers":[65],"intentions":[67],"based":[68],"on":[69],"changing":[70],"environmental":[71],"conditions":[72],"optimal":[75],"achieve":[80],"common":[81],"goals.":[82],"Additionally,":[83],"system":[85],"incorporates":[86],"mechanism":[88],"regenerate":[90],"new":[91,118],"VLM":[94],"analysis":[95],"when":[96],"failures":[98],"or":[99],"external":[100],"interference":[101],"occur.":[102],"Furthermore,":[103],"designing":[105],"as":[107],"versatile":[108],"modules":[109],"diverse":[111],"tasks,":[112],"our":[113],"proposed":[114],"offers":[116],"planning":[123],"excels":[125],"both":[127],"efficiency":[128],"adaptability.":[130]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-28T00:00:00"}
