{"id":"https://openalex.org/W4405785029","doi":"https://doi.org/10.1109/iros58592.2024.10802219","title":"KOSMOS-E : Learning to Follow Instruction for Robotic Grasping","display_name":"KOSMOS-E : Learning to Follow Instruction for Robotic Grasping","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4405785029","doi":"https://doi.org/10.1109/iros58592.2024.10802219"},"language":"en","primary_location":{"id":"doi:10.1109/iros58592.2024.10802219","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10802219","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100376392","display_name":"Zhi Wang","orcid":"https://orcid.org/0000-0002-3252-9254"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhi Wang","raw_affiliation_strings":["Microsoft Research Asia,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia,Beijing,China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108923083","display_name":"Xun Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xun Wu","raw_affiliation_strings":["Microsoft Research Asia,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia,Beijing,China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061624006","display_name":"Shaohan Huang","orcid":"https://orcid.org/0000-0003-4324-6337"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaohan Huang","raw_affiliation_strings":["Microsoft Research Asia,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia,Beijing,China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100407434","display_name":"Dong Li","orcid":"https://orcid.org/0000-0002-3758-7218"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Dong","raw_affiliation_strings":["Microsoft Research Asia,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia,Beijing,China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100370065","display_name":"Wenhui Wang","orcid":"https://orcid.org/0000-0001-5884-7421"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhui Wang","raw_affiliation_strings":["Microsoft Research Asia,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia,Beijing,China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113130010","display_name":"Shuming Ma","orcid":"https://orcid.org/0000-0003-1091-1206"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuming Ma","raw_affiliation_strings":["Microsoft Research Asia,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia,Beijing,China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014662947","display_name":"Furu Wei","orcid":"https://orcid.org/0000-0002-7810-5852"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Furu Wei","raw_affiliation_strings":["Microsoft Research Asia,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia,Beijing,China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100376392"],"corresponding_institution_ids":["https://openalex.org/I4210113369"],"apc_list":null,"apc_paid":null,"fwci":0.4289,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.75820783,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"9510","last_page":"9517"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10533","display_name":"Teaching and Learning Programming","score":0.9812999963760376,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10533","display_name":"Teaching and Learning Programming","score":0.9812999963760376,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.9452999830245972,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9024999737739563,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7134894728660583},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5001153945922852},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.42868155241012573}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7134894728660583},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5001153945922852},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.42868155241012573}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros58592.2024.10802219","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10802219","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1606226685","https://openalex.org/W1999156278","https://openalex.org/W2064878003","https://openalex.org/W2076363786","https://openalex.org/W2109163007","https://openalex.org/W2123435073","https://openalex.org/W2953669419","https://openalex.org/W2962737955","https://openalex.org/W3130885760","https://openalex.org/W3198685994","https://openalex.org/W4285613774","https://openalex.org/W4390874575","https://openalex.org/W6740972095","https://openalex.org/W6749696961","https://openalex.org/W6791353385","https://openalex.org/W6809646742","https://openalex.org/W6810630513","https://openalex.org/W6810640255","https://openalex.org/W6838865847","https://openalex.org/W6839632867","https://openalex.org/W6839928859","https://openalex.org/W6846254642","https://openalex.org/W6849120541","https://openalex.org/W6849594959","https://openalex.org/W6850015000","https://openalex.org/W6850300045","https://openalex.org/W6850503672","https://openalex.org/W6851592950","https://openalex.org/W6851999714","https://openalex.org/W6852136651","https://openalex.org/W6852884379","https://openalex.org/W6854222408","https://openalex.org/W6854555012","https://openalex.org/W6854738657","https://openalex.org/W6854866820","https://openalex.org/W6854929498","https://openalex.org/W6855970221","https://openalex.org/W6856535302","https://openalex.org/W6860622411"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Tuning":[0],"on":[1,111],"instruction-following":[2,40,61],"data":[3,43],"has":[4],"been":[5],"shown":[6],"to":[7,44],"enhance":[8,45],"the":[9,17,23,107],"capabilities":[10,46],"and":[11,49,85,87,102],"controllability":[12],"of":[13,79,109,118],"language":[14],"models,":[15],"but":[16],"idea":[18],"is":[19],"less":[20],"explored":[21],"in":[22],"robotic":[24,41,51,62,112],"field.":[25],"In":[26],"this":[27],"work,":[28],"we":[29,57],"introduce":[30],"KOSMOS-E,":[31],"a":[32,59,73,90,94,116],"Multimodal":[33],"Large":[34],"Language":[35],"Model":[36],"(MLLM)":[37],"that":[38],"leverages":[39],"grasping":[42,52,63,113],"for":[47],"precise":[48],"intricate":[50],"maneuvers.":[53],"To":[54],"achieve":[55],"this,":[56],"craft":[58],"large-scale":[60],"dataset,":[64],"termed":[65],"INSTRUCT-GRASP,":[66],"primarily":[67],"comprising":[68],"two":[69],"aspects:":[70],"(i)":[71],"grasp":[72,89],"single":[74],"object":[75,92],"following":[76,97],"varying":[77],"levels":[78],"granularity":[80],"descriptions,":[81],"e.g.,":[82,100],"different":[83],"angles":[84],"aspects,":[86],"(ii)":[88],"specific":[91,98],"within":[93],"multi-object":[95],"environment":[96],"attributes,":[99],"color":[101],"shape.":[103],"Extensive":[104],"experiments":[105],"show":[106],"effectiveness":[108],"KOSMOS-E":[110],"tasks":[114],"across":[115],"variety":[117],"environments.":[119]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
