{"id":"https://openalex.org/W4405778995","doi":"https://doi.org/10.1109/iros58592.2024.10801833","title":"Vision-Language Model-based Physical Reasoning for Robot Liquid Perception","display_name":"Vision-Language Model-based Physical Reasoning for Robot Liquid Perception","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4405778995","doi":"https://doi.org/10.1109/iros58592.2024.10801833"},"language":"en","primary_location":{"id":"doi:10.1109/iros58592.2024.10801833","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10801833","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112975499","display_name":"Wenqiang Lai","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105595","display_name":"Institute of Art","ror":"https://ror.org/017fyx225","country_code":"PL","type":"facility","lineage":["https://openalex.org/I4210105595","https://openalex.org/I99542240"]}],"countries":["PL"],"is_corresponding":true,"raw_author_name":"Wenqiang Lai","raw_affiliation_strings":["Shenzhen Institute of Artificial Intelligence and Robotics for Society"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Artificial Intelligence and Robotics for Society","institution_ids":["https://openalex.org/I4210105595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101591104","display_name":"Tianwei Zhang","orcid":"https://orcid.org/0000-0002-1462-5402"},"institutions":[{"id":"https://openalex.org/I4210105595","display_name":"Institute of Art","ror":"https://ror.org/017fyx225","country_code":"PL","type":"facility","lineage":["https://openalex.org/I4210105595","https://openalex.org/I99542240"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Tianwei Zhang","raw_affiliation_strings":["Shenzhen Institute of Artificial Intelligence and Robotics for Society"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Artificial Intelligence and Robotics for Society","institution_ids":["https://openalex.org/I4210105595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076185690","display_name":"Tin Lun Lam","orcid":"https://orcid.org/0000-0002-6363-1446"},"institutions":[{"id":"https://openalex.org/I4210105595","display_name":"Institute of Art","ror":"https://ror.org/017fyx225","country_code":"PL","type":"facility","lineage":["https://openalex.org/I4210105595","https://openalex.org/I99542240"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Tin Lun Lam","raw_affiliation_strings":["Shenzhen Institute of Artificial Intelligence and Robotics for Society"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Artificial Intelligence and Robotics for Society","institution_ids":["https://openalex.org/I4210105595"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032152290","display_name":"Yuan Gao","orcid":"https://orcid.org/0000-0001-6326-129X"},"institutions":[{"id":"https://openalex.org/I4210105595","display_name":"Institute of Art","ror":"https://ror.org/017fyx225","country_code":"PL","type":"facility","lineage":["https://openalex.org/I4210105595","https://openalex.org/I99542240"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Yuan Gao","raw_affiliation_strings":["Shenzhen Institute of Artificial Intelligence and Robotics for Society"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Artificial Intelligence and Robotics for Society","institution_ids":["https://openalex.org/I4210105595"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5112975499"],"corresponding_institution_ids":["https://openalex.org/I4210105595"],"apc_list":null,"apc_paid":null,"fwci":1.0239,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.78936705,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"9652","last_page":"9659"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.7991999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.7991999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.7662000060081482,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.6978999972343445,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6745148301124573},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5923831462860107},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5805633664131165},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4467742443084717},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3808480203151703},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.344510018825531},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.23999881744384766}],"concepts":[{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6745148301124573},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5923831462860107},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5805633664131165},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4467742443084717},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3808480203151703},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.344510018825531},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.23999881744384766},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros58592.2024.10801833","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10801833","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2017801178","https://openalex.org/W2149606722","https://openalex.org/W2787041204","https://openalex.org/W2963305465","https://openalex.org/W2997636197","https://openalex.org/W2998617917","https://openalex.org/W3003375370","https://openalex.org/W3130633756","https://openalex.org/W4224912544","https://openalex.org/W4280585654","https://openalex.org/W4313026212","https://openalex.org/W4383108457","https://openalex.org/W4389520366","https://openalex.org/W4389665575","https://openalex.org/W4389667049","https://openalex.org/W4401417048","https://openalex.org/W4402727815","https://openalex.org/W6686008357","https://openalex.org/W6693682517","https://openalex.org/W6799626271","https://openalex.org/W6809509765","https://openalex.org/W6839928859","https://openalex.org/W6845994847","https://openalex.org/W6846254642"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"There":[0],"is":[1],"a":[2,54,79],"growing":[3],"interest":[4],"in":[5,11,30,47,201],"applying":[6],"large":[7,48],"language":[8,72,132],"models":[9,50],"(LLMs)":[10],"robotic":[12],"tasks,":[13],"due":[14],"to":[15,90,94,110,167],"their":[16,177],"remarkable":[17],"reasoning":[18,185],"ability":[19],"and":[20,131,151,175,189],"extensive":[21],"knowledge":[22],"learned":[23,192],"from":[24,219],"vast":[25],"training":[26,155],"corpora.":[27],"Grounding":[28],"LLMs":[29],"the":[31,59,85,105,112,165,170,187,202,217,222],"physical":[32,60,106,171,190],"world":[33,61],"remains":[34],"an":[35],"open":[36],"challenge":[37],"as":[38,135],"they":[39],"can":[40,163],"only":[41],"process":[42],"textual":[43],"input.":[44],"Recent":[45],"advancements":[46],"vision-language":[49],"(LVLMs)":[51],"have":[52],"enabled":[53],"more":[55],"comprehensive":[56],"understanding":[57,107],"of":[58,108,118,148,173,204],"by":[62,88,183,221],"incorporating":[63],"visual":[64,113,188,206],"input,":[65],"which":[66],"provides":[67],"richer":[68],"contextual":[69],"information":[70],"than":[71],"alone.":[73],"In":[74],"this":[75],"work,":[76],"we":[77,103,158],"proposed":[78],"novel":[80],"paradigm":[81],"that":[82,160,182],"leveraged":[83],"GPT-4V(ision),":[84],"state-of-the-art":[86],"LVLM":[87],"OpenAI,":[89],"enable":[91,164],"embodied":[92],"agents":[93],"perceive":[95,169],"liquid":[96,199],"objects":[97,200],"via":[98],"image-based":[99],"environmental":[100],"feedback.":[101],"Specifically,":[102],"exploited":[104],"GPT-4V":[109],"interpret":[111],"representation":[114],"(e.g.,":[115,121,208],"time-series":[116],"plot)":[117],"non-visual":[119],"feedback":[120],"F/T":[122],"sensor":[123],"data),":[124],"indirectly":[125,168],"enabling":[126],"multimodal":[127],"perception":[128],"beyond":[129],"vision":[130],"using":[133,141],"images":[134],"proxies.":[136],"We":[137,179],"evaluated":[138],"our":[139,161,195],"method":[140,162,196],"10":[142],"common":[143],"household":[144],"liquids":[145,174],"with":[146,211],"containers":[147],"various":[149],"geometry":[150],"material.":[152],"Without":[153],"any":[154],"or":[156,214],"fine-tuning,":[157],"demonstrated":[159],"robot":[166],"response":[172],"estimate":[176],"viscosity.":[178],"also":[180],"showed":[181],"jointly":[184],"over":[186],"attributes":[191],"through":[193],"interactions,":[194],"could":[197],"recognize":[198],"absence":[203],"strong":[205],"cues":[207],"container":[209],"labels":[210],"legible":[212],"text":[213],"symbols),":[215],"increasing":[216],"accuracy":[218],"69.0%\u2014achieved":[220],"best-performing":[223],"vision-only":[224],"variant\u2014to":[225],"86.0%.":[226]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-17T09:09:15.849793","created_date":"2025-10-10T00:00:00"}
