{"id":"https://openalex.org/W4416749294","doi":"https://doi.org/10.1109/iros60139.2025.11246582","title":"GraspMAS: Zero-Shot Language-driven Grasp Detection with Multi-Agent System","display_name":"GraspMAS: Zero-Shot Language-driven Grasp Detection with Multi-Agent System","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416749294","doi":"https://doi.org/10.1109/iros60139.2025.11246582"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11246582","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246582","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101842285","display_name":"Quang Tung Nguyen","orcid":"https://orcid.org/0000-0002-0962-0332"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Quang Nguyen","raw_affiliation_strings":["FPT Software AI Center,Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT Software AI Center,Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113618808","display_name":"Tri Le","orcid":null},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Tri Le","raw_affiliation_strings":["FPT Software AI Center,Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT Software AI Center,Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052468815","display_name":"Huy A. Nguyen","orcid":"https://orcid.org/0000-0002-1227-6173"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Huy Nguyen","raw_affiliation_strings":["TU,Automation &#x0026; Control Institute (ACIN),Wien,Austria"],"affiliations":[{"raw_affiliation_string":"TU,Automation &#x0026; Control Institute (ACIN),Wien,Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043429366","display_name":"Thieu Vo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210126319","display_name":"Duke-NUS Medical School","ror":"https://ror.org/02j1m6098","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596","https://openalex.org/I170897317","https://openalex.org/I4210126319"]},{"id":"https://openalex.org/I4210145818","display_name":"Yale-NUS College","ror":null,"country_code":"SG","type":null,"lineage":["https://openalex.org/I4210145818"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Thieu Vo","raw_affiliation_strings":["NUS,Department of Mathematics,Singapore"],"affiliations":[{"raw_affiliation_string":"NUS,Department of Mathematics,Singapore","institution_ids":["https://openalex.org/I4210126319","https://openalex.org/I4210145818"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079787626","display_name":"Tung D. Ta","orcid":"https://orcid.org/0000-0002-2342-1364"},"institutions":[{"id":"https://openalex.org/I130653702","display_name":"Creative Technology (Singapore)","ror":"https://ror.org/040pjdm93","country_code":"SG","type":"company","lineage":["https://openalex.org/I130653702"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Tung D. Ta","raw_affiliation_strings":["University of Tokyo,Department of Creative Informatics,Japan"],"affiliations":[{"raw_affiliation_string":"University of Tokyo,Department of Creative Informatics,Japan","institution_ids":["https://openalex.org/I130653702"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045614046","display_name":"Baoru Huang","orcid":"https://orcid.org/0000-0002-4421-652X"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Baoru Huang","raw_affiliation_strings":["University of Liverpool,Department of Computer Science,UK"],"affiliations":[{"raw_affiliation_string":"University of Liverpool,Department of Computer Science,UK","institution_ids":["https://openalex.org/I146655781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063231826","display_name":"Minh N. Vu","orcid":"https://orcid.org/0000-0001-8727-0350"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Minh N. Vu","raw_affiliation_strings":["TU,Automation &#x0026; Control Institute (ACIN),Wien,Austria"],"affiliations":[{"raw_affiliation_string":"TU,Automation &#x0026; Control Institute (ACIN),Wien,Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101706488","display_name":"Anh Gia-Tuan Nguyen","orcid":"https://orcid.org/0000-0003-3606-4199"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Anh Nguyen","raw_affiliation_strings":["University of Liverpool,Department of Computer Science,UK"],"affiliations":[{"raw_affiliation_string":"University of Liverpool,Department of Computer Science,UK","institution_ids":["https://openalex.org/I146655781"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101842285"],"corresponding_institution_ids":["https://openalex.org/I109689652"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.48296264,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"14939","last_page":"14946"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.692300021648407,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.692300021648407,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09149999916553497,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.025299999862909317,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/grasp","display_name":"GRASP","score":0.928600013256073},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6330000162124634},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5669999718666077},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.546500027179718},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.5094000101089478},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.3095000088214874}],"concepts":[{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.928600013256073},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.777899980545044},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6330000162124634},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6171000003814697},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5669999718666077},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.546500027179718},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.5094000101089478},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.36469998955726624},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3095000088214874},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.30309998989105225},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C2779038628","wikidata":"https://www.wikidata.org/wiki/Q7248497","display_name":"Programming by demonstration","level":3,"score":0.273499995470047},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2619999945163727},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.257999986410141}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11246582","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246582","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1892339738","https://openalex.org/W1999156278","https://openalex.org/W2076363786","https://openalex.org/W2089630413","https://openalex.org/W2109163007","https://openalex.org/W2201912979","https://openalex.org/W2564383266","https://openalex.org/W2805051407","https://openalex.org/W2955639361","https://openalex.org/W2962875890","https://openalex.org/W3035198432","https://openalex.org/W3088158297","https://openalex.org/W3099587965","https://openalex.org/W3130885760","https://openalex.org/W3205420310","https://openalex.org/W4221167790","https://openalex.org/W4280533594","https://openalex.org/W4312238277","https://openalex.org/W4319779677","https://openalex.org/W4327545837","https://openalex.org/W4366563389","https://openalex.org/W4383097638","https://openalex.org/W4383108457","https://openalex.org/W4383108836","https://openalex.org/W4386065691","https://openalex.org/W4387092545","https://openalex.org/W4389666115","https://openalex.org/W4389667112","https://openalex.org/W4390871819","https://openalex.org/W4390872747","https://openalex.org/W4390874575","https://openalex.org/W4401413802","https://openalex.org/W4402702930","https://openalex.org/W4402726790","https://openalex.org/W4402753835","https://openalex.org/W4402754165","https://openalex.org/W4402961691","https://openalex.org/W4404612908","https://openalex.org/W4404725708","https://openalex.org/W4405078890","https://openalex.org/W4405785249","https://openalex.org/W4405785344","https://openalex.org/W4405786915","https://openalex.org/W4413925207"],"related_works":[],"abstract_inverted_index":{"Language-driven":[0],"grasp":[1,80],"detection":[2],"has":[3],"the":[4,119,150],"potential":[5],"to":[6,13,35,56,58,85],"revolutionize":[7],"human-robot":[8],"interaction":[9],"by":[10],"allowing":[11],"robots":[12],"understand":[14],"and":[15,89,111,115,121,145],"execute":[16],"grasping":[17],"tasks":[18],"based":[19],"on":[20,126],"natural":[21],"language":[22],"commands.":[23],"However,":[24],"existing":[25,136],"approaches":[26],"face":[27],"two":[28,127],"key":[29],"challenges.":[30],"First,":[31],"they":[32],"often":[33],"struggle":[34],"interpret":[36],"complex":[37,106],"text":[38],"instructions":[39],"or":[40,53],"operate":[41],"ineffectively":[42],"in":[43,64,92,142],"densely":[44],"cluttered":[45],"environments.":[46],"Second,":[47],"most":[48],"methods":[49],"require":[50],"a":[51,73],"training":[52],"fine-tuning":[54],"step":[55],"adapt":[57],"new":[59,74],"domains,":[60],"limiting":[61],"their":[62],"generation":[63],"real-world":[65,93,146],"applications.":[66],"In":[67],"this":[68],"paper,":[69],"we":[70],"introduce":[71],"GraspMAS,":[72],"multi-agent":[75],"system":[76],"framework":[77,96],"for":[78,104],"language-driven":[79],"detection.":[81],"GraspMAS":[82,133],"is":[83,158],"designed":[84],"reason":[86],"through":[87],"ambiguities":[88],"improve":[90],"decision-making":[91],"scenarios.":[94],"Our":[95,155],"consists":[97],"of":[98,152],"three":[99],"specialized":[100],"agents:":[101],"Planner,":[102],"responsible":[103],"strategizing":[105],"queries;":[107],"Coder,":[108],"which":[109,117],"generates":[110],"executes":[112],"source":[113],"code;":[114],"Observer,":[116],"evaluates":[118],"outcomes":[120],"provides":[122],"feedback.":[123],"Intensive":[124],"experiments":[125,140],"large-scale":[128],"datasets":[129],"demonstrate":[130],"that":[131],"our":[132,153],"significantly":[134],"outperforms":[135],"baselines.":[137],"Additionally,":[138],"robot":[139],"conducted":[141],"both":[143],"simulation":[144],"settings":[147],"further":[148],"validate":[149],"effectiveness":[151],"approach.":[154],"project":[156],"page":[157],"available":[159],"at":[160],"https://zquang2202.github.io/GraspMAS.":[161]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-28T00:00:00"}
