{"id":"https://openalex.org/W7125922161","doi":"https://doi.org/10.1109/smc58881.2025.11343173","title":"QwenGrasp: Human-Robot Interactive 6-DoF Target-Oriented Grasping with Large Vision-Language Model","display_name":"QwenGrasp: Human-Robot Interactive 6-DoF Target-Oriented Grasping with Large Vision-Language Model","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125922161","doi":"https://doi.org/10.1109/smc58881.2025.11343173"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11343173","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11343173","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100443902","display_name":"Xinyu Chen","orcid":"https://orcid.org/0000-0002-9507-5835"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinyu Chen","raw_affiliation_strings":["Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124074544","display_name":"Jian Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Yang","raw_affiliation_strings":["Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124101977","display_name":"Qi Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Zhao","raw_affiliation_strings":["Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013502815","display_name":"Zonghan He","orcid":"https://orcid.org/0009-0001-3327-1478"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zonghan He","raw_affiliation_strings":["Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079522581","display_name":"Haobin Yang","orcid":"https://orcid.org/0000-0002-9184-4473"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haobin Yang","raw_affiliation_strings":["Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123390142","display_name":"Yuhui Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhui Shi","raw_affiliation_strings":["Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China,518055","institution_ids":["https://openalex.org/I3045169105"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100443902"],"corresponding_institution_ids":["https://openalex.org/I3045169105"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.70217059,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7723","last_page":"7730"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9692000150680542,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9692000150680542,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.007199999876320362,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.0031999999191612005,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/grasp","display_name":"GRASP","score":0.8702999949455261},{"id":"https://openalex.org/keywords/workspace","display_name":"Workspace","score":0.6960999965667725},{"id":"https://openalex.org/keywords/robotic-hand","display_name":"Robotic hand","score":0.44999998807907104},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4366999864578247},{"id":"https://openalex.org/keywords/grippers","display_name":"Grippers","score":0.4219000041484833},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.3188999891281128}],"concepts":[{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.8702999949455261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8169999718666077},{"id":"https://openalex.org/C58581272","wikidata":"https://www.wikidata.org/wiki/Q12741163","display_name":"Workspace","level":3,"score":0.6960999965667725},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6381000280380249},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5199999809265137},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.48339998722076416},{"id":"https://openalex.org/C2988191880","wikidata":"https://www.wikidata.org/wiki/Q40687","display_name":"Robotic hand","level":3,"score":0.44999998807907104},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4366999864578247},{"id":"https://openalex.org/C2775960376","wikidata":"https://www.wikidata.org/wiki/Q1435859","display_name":"Grippers","level":2,"score":0.4219000041484833},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.2930000126361847},{"id":"https://openalex.org/C150415221","wikidata":"https://www.wikidata.org/wiki/Q40687","display_name":"Robotic arm","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2897000014781952},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C17511633","wikidata":"https://www.wikidata.org/wiki/Q830694","display_name":"SMT placement equipment","level":3,"score":0.26179999113082886},{"id":"https://openalex.org/C2779038628","wikidata":"https://www.wikidata.org/wiki/Q7248497","display_name":"Programming by demonstration","level":3,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11343173","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11343173","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6470156311988831,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2551182329","https://openalex.org/W2560609797","https://openalex.org/W2889969363","https://openalex.org/W2962837436","https://openalex.org/W2966615203","https://openalex.org/W2969014098","https://openalex.org/W2986303149","https://openalex.org/W3004047800","https://openalex.org/W3035198432","https://openalex.org/W3090814639","https://openalex.org/W3132114951","https://openalex.org/W3176770340","https://openalex.org/W3204876017","https://openalex.org/W3207187156","https://openalex.org/W3207888596","https://openalex.org/W3210327237","https://openalex.org/W3217655018","https://openalex.org/W4281730135","https://openalex.org/W4383108836","https://openalex.org/W4383108845","https://openalex.org/W4383109104","https://openalex.org/W4387092545","https://openalex.org/W4389611248","https://openalex.org/W4389667112","https://openalex.org/W4403677113","https://openalex.org/W4407468102"],"related_works":[],"abstract_inverted_index":{"Human-robot":[0],"interactive":[1],"target-oriented":[2,132],"grasping":[3,39,71,126],"in":[4,134],"unstructured":[5],"environments,":[6],"guided":[7],"by":[8],"natural":[9],"language,":[10],"is":[11],"crucial":[12],"for":[13,29],"enabling":[14],"intelligent":[15],"robotic":[16,30],"arms":[17,31],"to":[18,32,56,67,90,119,172],"perform":[19,68],"tasks":[20],"safely":[21],"and":[22,36,95,123,137,153,174],"efficiently.":[23],"However,":[24],"it":[25,129],"remains":[26],"a":[27,47,52],"challenge":[28],"comprehend":[33],"human":[34,121],"instructions":[35],"execute":[37,124],"corresponding":[38],"actions.":[40,127],"In":[41],"this":[42],"paper,":[43],"we":[44,78],"propose":[45],"QwenGrasp,":[46],"novel":[48],"system":[49],"that":[50],"uses":[51],"large":[53],"vision-language":[54],"model":[55],"align":[57],"workspace":[58],"images":[59],"with":[60,168],"textual":[61],"instructions.":[62],"This":[63],"alignment":[64],"enables":[65],"QwenGrasp":[66,104,150],"accurate":[69],"6DoF":[70],"on":[72],"the":[73,88,155,163],"specified":[74],"target":[75],"object.":[76,157],"Additionally,":[77],"introduce":[79],"Masked":[80],"REGNet,":[81],"which":[82],"incorporates":[83],"target-object":[84],"location":[85],"information":[86],"into":[87],"network":[89],"generate":[91],"precise":[92,125],"grasp":[93,98],"poses":[94],"ensure":[96],"high":[97],"quality.":[99],"Through":[100],"extensive":[101],"real-world":[102],"experiments,":[103],"achieves":[105],"over":[106],"90%":[107],"success":[108],"across":[109],"six":[110],"diverse":[111],"instruction":[112,138],"types.":[113],"The":[114],"results":[115],"highlight":[116],"QwenGrasp\u2019s":[117],"ability":[118],"understand":[120],"intent":[122],"Notably,":[128],"outperforms":[130],"other":[131],"methods":[133],"both":[135],"performance":[136],"comprehension.":[139],"Even":[140],"when":[141],"given":[142],"vague":[143],"descriptions,":[144],"directional":[145],"cues,":[146],"or":[147],"complex":[148],"instructions,":[149],"reliably":[151],"identifies":[152],"grasps":[154],"correct":[156],"An":[158],"ablation":[159],"study":[160],"further":[161],"confirms":[162],"importance":[164],"of":[165],"each":[166],"component,":[167],"all":[169],"contributing":[170],"significantly":[171],"robust":[173],"high-quality":[175],"grasping.":[176]},"counts_by_year":[],"updated_date":"2026-01-29T23:17:01.242718","created_date":"2026-01-29T00:00:00"}
