{"id":"https://openalex.org/W4405709612","doi":"https://doi.org/10.1109/iscslp63861.2024.10800613","title":"Empowering Robots with Multimodal Language Models for Task Planning with Interaction","display_name":"Empowering Robots with Multimodal Language Models for Task Planning with Interaction","publication_year":2024,"publication_date":"2024-11-07","ids":{"openalex":"https://openalex.org/W4405709612","doi":"https://doi.org/10.1109/iscslp63861.2024.10800613"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp63861.2024.10800613","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112362321","display_name":"Tong Lee Chung","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135242","display_name":"Omnitech Robotics (United States)","ror":"https://ror.org/03dhfj813","country_code":"US","type":"company","lineage":["https://openalex.org/I4210135242"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tong Lee Chung","raw_affiliation_strings":["Research Institute of UBTech Robotics"],"affiliations":[{"raw_affiliation_string":"Research Institute of UBTech Robotics","institution_ids":["https://openalex.org/I4210135242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040709029","display_name":"Jianxin Pang","orcid":"https://orcid.org/0000-0002-3985-5802"},"institutions":[{"id":"https://openalex.org/I4210135242","display_name":"Omnitech Robotics (United States)","ror":"https://ror.org/03dhfj813","country_code":"US","type":"company","lineage":["https://openalex.org/I4210135242"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jianxin Pang","raw_affiliation_strings":["Research Institute of UBTech Robotics"],"affiliations":[{"raw_affiliation_string":"Research Institute of UBTech Robotics","institution_ids":["https://openalex.org/I4210135242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101705358","display_name":"Jun Cheng","orcid":"https://orcid.org/0000-0002-3131-3275"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Cheng","raw_affiliation_strings":["Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210145761","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5112362321"],"corresponding_institution_ids":["https://openalex.org/I4210135242"],"apc_list":null,"apc_paid":null,"fwci":0.6762,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.77226702,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"358","last_page":"362"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9589999914169312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9589999914169312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9527999758720398,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.9380999803543091,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7607907056808472},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6607273817062378},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6587048172950745},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.6478606462478638},{"id":"https://openalex.org/keywords/multimodal-interaction","display_name":"Multimodal interaction","score":0.5638297200202942},{"id":"https://openalex.org/keywords/human\u2013robot-interaction","display_name":"Human\u2013robot interaction","score":0.423613578081131},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.4121696650981903},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3740144968032837},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3546517491340637},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14101895689964294},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.13802701234817505}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7607907056808472},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6607273817062378},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6587048172950745},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.6478606462478638},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.5638297200202942},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.423613578081131},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.4121696650981903},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3740144968032837},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3546517491340637},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14101895689964294},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.13802701234817505}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp63861.2024.10800613","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W2191285365","https://openalex.org/W2740919303","https://openalex.org/W3034758614","https://openalex.org/W3155584966","https://openalex.org/W4211164456","https://openalex.org/W4307680525","https://openalex.org/W4378746725","https://openalex.org/W4385245566","https://openalex.org/W4385571710","https://openalex.org/W4386076314","https://openalex.org/W4386083015","https://openalex.org/W4388182168","https://openalex.org/W4389524458","https://openalex.org/W4389665836","https://openalex.org/W4390871755","https://openalex.org/W4390874280","https://openalex.org/W4391417118","https://openalex.org/W4394828156","https://openalex.org/W4403307161","https://openalex.org/W6635590879","https://openalex.org/W6790356757","https://openalex.org/W6791353385","https://openalex.org/W6802366246","https://openalex.org/W6810640255","https://openalex.org/W6811129797","https://openalex.org/W6850503672","https://openalex.org/W6851592950","https://openalex.org/W6852781825","https://openalex.org/W6856800273","https://openalex.org/W6858103023","https://openalex.org/W6859532197"],"related_works":["https://openalex.org/W3213722473","https://openalex.org/W2110944602","https://openalex.org/W4287179229","https://openalex.org/W3205513966","https://openalex.org/W3120459843","https://openalex.org/W4366547574","https://openalex.org/W3200191727","https://openalex.org/W1564680838","https://openalex.org/W4281567410","https://openalex.org/W2003125260"],"abstract_inverted_index":{"This":[0],"paper":[1],"introduces":[2],"a":[3,17,50,52,55],"multimodal":[4,74,81,122],"language":[5],"model":[6],"based":[7],"agent":[8,47],"system":[9,26,48,97],"for":[10,38,105,147],"task":[11,56,94,136],"planning":[12],"with":[13,70],"interaction":[14,72,82],"integrated":[15],"into":[16],"physical":[18],"robot":[19],"to":[20,119],"facilitate":[21],"enhanced":[22],"human-robot":[23],"interactions.":[24,45],"The":[25,46,116],"architecture":[27,100],"integrates":[28],"textual":[29],"and":[30,43,54,61,73,89,114,135,149],"visual":[31],"inputs":[32,123],"directly":[33],"within":[34],"the":[35,99,111],"system,":[36],"allowing":[37],"seamless":[39],"transitions":[40],"between":[41],"conversational":[42],"task-oriented":[44],"comprises":[49],"router,":[51],"chatbot,":[53],"planner":[57],"enabling":[58],"efficient":[59],"decision-making":[60],"flexibility":[62],"in":[63,93,144],"real-world":[64],"applications.":[65],"We":[66],"conduct":[67],"in-depth":[68],"study":[69],"text-only":[71],"interaction,":[75],"our":[76],"extensive":[77],"experiments":[78],"find":[79],"that":[80],"receives":[83],"higher":[84],"score":[85],"by":[86,101,130],"human":[87],"evaluation":[88],"improves":[90],"success":[91],"rates":[92],"planning.":[95],"Our":[96],"simplifies":[98],"incorporating":[102],"multiple":[103],"agents":[104],"specific":[106],"tasks":[107],"while":[108],"improving":[109],"both":[110],"system's":[112],"efficiency":[113],"cost-effectiveness.":[115],"robot's":[117],"ability":[118],"interact":[120],"using":[121],"significantly":[124],"enhances":[125],"user":[126],"experience,":[127],"as":[128],"evidenced":[129],"evaluations":[131],"measuring":[132],"friendliness,":[133],"usefulness,":[134],"completeness.":[137],"Practical":[138],"implications":[139],"are":[140],"vast,":[141],"promising":[142],"advancements":[143],"robotics":[145],"applications":[146],"personal":[148],"professional":[150],"environments.":[151]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
