{"id":"https://openalex.org/W4408323433","doi":"https://doi.org/10.1109/tase.2025.3550360","title":"Integrating With Multimodal Information for Enhancing Robotic Grasping With Vision-Language Models","display_name":"Integrating With Multimodal Information for Enhancing Robotic Grasping With Vision-Language Models","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4408323433","doi":"https://doi.org/10.1109/tase.2025.3550360"},"language":"en","primary_location":{"id":"doi:10.1109/tase.2025.3550360","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tase.2025.3550360","pdf_url":null,"source":{"id":"https://openalex.org/S34881539","display_name":"IEEE Transactions on Automation Science and Engineering","issn_l":"1545-5955","issn":["1545-5955","1558-3783"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automation Science and Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019182690","display_name":"Zhou Zhao","orcid":"https://orcid.org/0000-0002-6547-646X"},"institutions":[{"id":"https://openalex.org/I40963666","display_name":"Central China Normal University","ror":"https://ror.org/03x1jna21","country_code":"CN","type":"education","lineage":["https://openalex.org/I40963666"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhou Zhao","raw_affiliation_strings":["School of Computer Science, Central China Normal University, Wuhan, China","School of Computer Science, Central China Normal University and the Hubei Engineering Research Center for Intelligent Detection and Identification of Complex Parts, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Central China Normal University, Wuhan, China","institution_ids":["https://openalex.org/I40963666"]},{"raw_affiliation_string":"School of Computer Science, Central China Normal University and the Hubei Engineering Research Center for Intelligent Detection and Identification of Complex Parts, Wuhan, China","institution_ids":["https://openalex.org/I40963666"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111280200","display_name":"Dongyuan Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I40963666","display_name":"Central China Normal University","ror":"https://ror.org/03x1jna21","country_code":"CN","type":"education","lineage":["https://openalex.org/I40963666"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongyuan Zheng","raw_affiliation_strings":["School of Computer Science, Central China Normal University, Wuhan, China","School of Computer Science, Central China Normal University and the Hubei Engineering Research Center for Intelligent Detection and Identification of Complex Parts, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Central China Normal University, Wuhan, China","institution_ids":["https://openalex.org/I40963666"]},{"raw_affiliation_string":"School of Computer Science, Central China Normal University and the Hubei Engineering Research Center for Intelligent Detection and Identification of Complex Parts, Wuhan, China","institution_ids":["https://openalex.org/I40963666"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043612201","display_name":"Yizi Chen","orcid":"https://orcid.org/0000-0003-1637-0092"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Yizi Chen","raw_affiliation_strings":["Institute of Cartography and Geoinformation, ETH Z&#x00FC;rich, Z&#x00FC;rich, Switzerland","Institute of Cartography and Geoinformation, ETH Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Institute of Cartography and Geoinformation, ETH Z&#x00FC;rich, Z&#x00FC;rich, Switzerland","institution_ids":[]},{"raw_affiliation_string":"Institute of Cartography and Geoinformation, ETH Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116252143","display_name":"Jing Luo","orcid":"https://orcid.org/0000-0002-9480-5875"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Luo","raw_affiliation_strings":["School of Automation, Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Automation, Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327175","display_name":"Yanjun Wang","orcid":"https://orcid.org/0000-0002-1336-7464"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanjun Wang","raw_affiliation_strings":["Institute of Marine Equipment, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Institute of Marine Equipment, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091127758","display_name":"Panfeng Huang","orcid":"https://orcid.org/0000-0002-5132-9602"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Panfeng Huang","raw_affiliation_strings":["National Key Laboratory of Aerospace Flight Dynamics and the Research Center for Intelligent Robotics, School of Astronautics, Northwestern Polytechnical University, Xi&#x2019;an, China","School of Astronautics, National Key Laboratory of Aerospace Flight Dynamics and Research Center for Intelligent Robotics, Northwestern Polytechnical University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Aerospace Flight Dynamics and the Research Center for Intelligent Robotics, School of Astronautics, Northwestern Polytechnical University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"School of Astronautics, National Key Laboratory of Aerospace Flight Dynamics and Research Center for Intelligent Robotics, Northwestern Polytechnical University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019906827","display_name":"Chenguang Yang","orcid":"https://orcid.org/0000-0001-5255-5559"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chenguang Yang","raw_affiliation_strings":["Department of Computer Science, University of Liverpool, Liverpool, U.K","Department of Computer Science, University of Liverpool, Liverpool, UK"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Liverpool, Liverpool, U.K","institution_ids":["https://openalex.org/I146655781"]},{"raw_affiliation_string":"Department of Computer Science, University of Liverpool, Liverpool, UK","institution_ids":["https://openalex.org/I146655781"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5019182690"],"corresponding_institution_ids":["https://openalex.org/I40963666"],"apc_list":null,"apc_paid":null,"fwci":8.1318,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.9746559,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"22","issue":null,"first_page":"13073","last_page":"13086"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.953499972820282,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10820","display_name":"Fuzzy Logic and Control Systems","score":0.9211000204086304,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5937880277633667},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.5799773335456848},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5467303395271301},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5392706394195557},{"id":"https://openalex.org/keywords/human\u2013robot-interaction","display_name":"Human\u2013robot interaction","score":0.46432653069496155},{"id":"https://openalex.org/keywords/machine-vision","display_name":"Machine vision","score":0.4639904499053955},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language understanding","score":0.4417484998703003},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.43923237919807434},{"id":"https://openalex.org/keywords/grippers","display_name":"Grippers","score":0.4299126863479614},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.36979347467422485}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5937880277633667},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5799773335456848},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5467303395271301},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5392706394195557},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.46432653069496155},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.4639904499053955},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.4417484998703003},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.43923237919807434},{"id":"https://openalex.org/C2775960376","wikidata":"https://www.wikidata.org/wiki/Q1435859","display_name":"Grippers","level":2,"score":0.4299126863479614},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.36979347467422485},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tase.2025.3550360","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tase.2025.3550360","pdf_url":null,"source":{"id":"https://openalex.org/S34881539","display_name":"IEEE Transactions on Automation Science and Engineering","issn_l":"1545-5955","issn":["1545-5955","1558-3783"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automation Science and Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5103456268","display_name":null,"funder_award_id":"2024AFB245","funder_id":"https://openalex.org/F4320322186","funder_display_name":"Natural Science Foundation of Hubei Province"},{"id":"https://openalex.org/G5845721495","display_name":null,"funder_award_id":"2024AFB245","funder_id":"https://openalex.org/F4320317776","funder_display_name":"Hebei Provincial\u00a0Postdoctoral Science\u00a0Foundation"},{"id":"https://openalex.org/G6006398987","display_name":null,"funder_award_id":"2024AFB614","funder_id":"https://openalex.org/F4320317776","funder_display_name":"Hebei Provincial\u00a0Postdoctoral Science\u00a0Foundation"},{"id":"https://openalex.org/G8691886419","display_name":null,"funder_award_id":"62203341","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320317776","display_name":"Hebei Provincial\u00a0Postdoctoral Science\u00a0Foundation","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322186","display_name":"Natural Science Foundation of Hubei Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W1999156278","https://openalex.org/W2109163007","https://openalex.org/W2194775991","https://openalex.org/W2229480318","https://openalex.org/W2824754393","https://openalex.org/W2842511635","https://openalex.org/W2910077286","https://openalex.org/W2953669419","https://openalex.org/W2962716343","https://openalex.org/W2962784628","https://openalex.org/W2962875890","https://openalex.org/W2963033241","https://openalex.org/W2963244312","https://openalex.org/W2967153639","https://openalex.org/W2980216391","https://openalex.org/W3006932462","https://openalex.org/W3035198432","https://openalex.org/W3130885760","https://openalex.org/W3186077919","https://openalex.org/W3189615635","https://openalex.org/W3205420310","https://openalex.org/W3207908209","https://openalex.org/W4210316419","https://openalex.org/W4285102324","https://openalex.org/W4291365909","https://openalex.org/W4296817212","https://openalex.org/W4312324773","https://openalex.org/W4360897867","https://openalex.org/W4361858295","https://openalex.org/W4362654503","https://openalex.org/W4367146739","https://openalex.org/W4367282134","https://openalex.org/W4367834905","https://openalex.org/W4383108836","https://openalex.org/W4383109368","https://openalex.org/W4383337613","https://openalex.org/W4385245566","https://openalex.org/W4385431115","https://openalex.org/W4386083025","https://openalex.org/W4388430610","https://openalex.org/W4389667691","https://openalex.org/W4390874575","https://openalex.org/W4391305718","https://openalex.org/W4391367885","https://openalex.org/W4391695396","https://openalex.org/W4399563339","https://openalex.org/W4401163449","https://openalex.org/W4401413802","https://openalex.org/W4401414646","https://openalex.org/W4401415287","https://openalex.org/W4401870936","https://openalex.org/W4402702930","https://openalex.org/W4405785249","https://openalex.org/W4405910844","https://openalex.org/W6785726837","https://openalex.org/W6791353385","https://openalex.org/W6850503672","https://openalex.org/W6852884379","https://openalex.org/W6858561505","https://openalex.org/W6870201057","https://openalex.org/W6875806066"],"related_works":["https://openalex.org/W2258669850","https://openalex.org/W1513401665","https://openalex.org/W4283263216","https://openalex.org/W3160851599","https://openalex.org/W1640559846","https://openalex.org/W4287179229","https://openalex.org/W3205513966","https://openalex.org/W3120459843","https://openalex.org/W4366547574","https://openalex.org/W3200191727"],"abstract_inverted_index":{"As":[0],"robots":[1],"grow":[2],"increasingly":[3,241],"intelligent":[4],"and":[5,51,58,94,147,155,182,186,211,218,255,279,351],"utilize":[6],"data":[7,15,27,42,149],"from":[8,119],"various":[9],"sensors,":[10],"relying":[11],"solely":[12],"on":[13,106,344],"unimodal":[14,271],"sources":[16,251],"is":[17,306],"becoming":[18],"inadequate":[19],"for":[20,74,184,216,302,348,376],"their":[21,120],"operational":[22],"needs.":[23],"Consequently,":[24],"integrating":[25,70],"multimodal":[26],"has":[28,239],"emerged":[29],"as":[30,285],"a":[31,45,67,246],"critical":[32],"area":[33],"of":[34,40,135,180,209,230,252,269],"focus.":[35],"However,":[36],"the":[37,116,123,132,165,196,228,267,346,374],"effective":[38,154],"combination":[39],"different":[41],"modalities":[43],"poses":[44],"considerable":[46],"challenge,":[47],"especially":[48],"in":[49,122,189,236,281,296,314,327,382],"complex":[50,237],"dynamic":[52,282],"settings":[53],"where":[54,304],"accurate":[55,276],"object":[56,103,277],"recognition":[57,278],"manipulation":[59,280],"are":[60,111,318],"essential.":[61],"In":[62],"this":[63,107,367],"paper,":[64],"we":[65,126,363],"introduce":[66],"novel":[68],"framework":[69,88],"with":[71,77,275],"Multimodal":[72],"Information":[73],"Grasping":[75],"Synthesis":[76],"vision-language":[78],"models":[79],"(MIG)":[80],"designed":[81],"to":[82,114,130,195,223,233,320,332,356,365],"improve":[83],"robotic":[84,158,225,259,380],"grasping":[85,143,260],"capabilities.":[86,261],"This":[87,243],"incorporates":[89],"visual":[90,133,146],"data,":[91],"textual":[92,148],"information,":[93],"human-derived":[95],"prior":[96,108],"knowledge.":[97],"We":[98,160,168],"start":[99],"by":[100],"creating":[101],"target":[102,117],"masks":[104],"based":[105],"knowledge,":[109],"which":[110],"then":[112],"used":[113],"segregate":[115],"objects":[118],"surroundings":[121],"image.":[124],"Subsequently,":[125],"employ":[127],"language":[128],"cues":[129],"refine":[131],"representations":[134],"these":[136,361],"objects.":[137],"Finally,":[138],"our":[139,171,200,309],"system":[140,347],"executes":[141],"precise":[142],"actions":[144],"using":[145,164],"synthesis,":[150],"thus":[151],"facilitating":[152],"more":[153,204,369,378],"contextually":[156],"aware":[157],"grasping.":[159],"carry":[161],"out":[162],"experiments":[163],"OCID-VLG":[166],"dataset.":[167],"observe":[169],"that":[170,248,273],"methodology":[172],"surpasses":[173],"current":[174,270],"state-of-the-art":[175],"(SOTA)":[176],"techniques,":[177],"delivering":[178],"improvements":[179,295],"9.91%":[181],"5.70%":[183],"top-1":[185,217],"top-5":[187,219],"predictions":[188],"grasp":[190,297],"accuracy.":[191],"Moreover,":[192],"when":[193],"apply":[194],"reconstructed":[197],"Grasp-MultiObject":[198],"dataset,":[199],"approach":[201,368],"demonstrates":[202,293],"even":[203],"substantial":[205],"enhancements,":[206],"achieving":[207],"gains":[208],"17.63%":[210],"22.76%":[212],"over":[213],"SOTA":[214],"methods":[215],"predictions,":[220],"respectively.":[221],"Note":[222],"Practitioners\u2014As":[224],"systems":[226,272],"evolve,":[227],"challenge":[229],"enabling":[231],"them":[232],"function":[234],"effectively":[235],"environments":[238,330],"become":[240],"apparent.":[242],"paper":[244],"introduces":[245],"solution":[247],"integrates":[249],"multiple":[250],"data\u2014visual,":[253],"textual,":[254],"human":[256],"knowledge\u2014to":[257],"enhance":[258,357],"The":[262,322],"practical":[263],"problems":[264],"addressed":[265],"include":[266],"limitations":[268,319],"struggle":[274],"settings,":[283],"such":[284],"warehouses":[286],"or":[287,336],"assembly":[288],"lines.":[289],"Our":[290],"framework,":[291],"MIG,":[292],"significant":[294],"accuracy,":[298],"making":[299],"it":[300],"suitable":[301],"tasks":[303],"precision":[305],"critical.":[307],"While":[308],"results":[310],"show":[311],"promise,":[312],"particularly":[313],"controlled":[315],"experiments,":[316],"there":[317],"consider.":[321],"framework\u2019s":[323],"performance":[324],"may":[325],"vary":[326],"unstructured":[328],"real-world":[329],"due":[331],"factors":[333],"like":[334],"occlusion":[335],"varying":[337],"lighting":[338],"conditions.":[339],"Future":[340],"work":[341],"should":[342],"focus":[343],"refining":[345],"real-time":[349],"application":[350],"exploring":[352],"additional":[353],"sensory":[354],"inputs":[355],"robustness.":[358],"By":[359],"addressing":[360],"challenges,":[362],"aim":[364],"make":[366],"applicable":[370],"across":[371],"industries,":[372],"paving":[373],"way":[375],"smarter,":[377],"adaptable":[379],"solutions":[381],"everyday":[383],"tasks.":[384]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
