{"id":"https://openalex.org/W7116715517","doi":"https://doi.org/10.1109/tase.2025.3647137","title":"SDAD: Structured Semantic Disentanglement and Attention Diffusion for Open-Vocabulary Grasping","display_name":"SDAD: Structured Semantic Disentanglement and Attention Diffusion for Open-Vocabulary Grasping","publication_year":2025,"publication_date":"2025-12-22","ids":{"openalex":"https://openalex.org/W7116715517","doi":"https://doi.org/10.1109/tase.2025.3647137"},"language":null,"primary_location":{"id":"doi:10.1109/tase.2025.3647137","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tase.2025.3647137","pdf_url":null,"source":{"id":"https://openalex.org/S34881539","display_name":"IEEE Transactions on Automation Science and Engineering","issn_l":"1545-5955","issn":["1545-5955","1558-3783"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automation Science and Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Kai Sun","orcid":"https://orcid.org/0009-0000-4471-0555"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kai Sun","raw_affiliation_strings":["School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120983448","display_name":"Jin Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Liu","raw_affiliation_strings":["School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120968304","display_name":"Yixuan Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yixuan Zhou","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yunfeng Kang","orcid":"https://orcid.org/0009-0000-7350-5630"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunfeng Kang","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112365476","display_name":"Yanzi Miao","orcid":null},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanzi Miao","raw_affiliation_strings":["School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"last","author":{"id":null,"display_name":"Hesheng Wang","orcid":"https://orcid.org/0000-0002-9959-1634"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hesheng Wang","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I25757504"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.60783408,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"23","issue":null,"first_page":"1507","last_page":"1518"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9587000012397766,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9587000012397766,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10982","display_name":"Motor Control and Adaptation","score":0.01269999984651804,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.005799999926239252,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/grasp","display_name":"GRASP","score":0.7657999992370605},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.6535999774932861},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5465999841690063},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5350000262260437},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5210000276565552},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5148000121116638},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5112000107765198},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.48809999227523804},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.486299991607666}],"concepts":[{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.7657999992370605},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.758400022983551},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.6535999774932861},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.649399995803833},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5465999841690063},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5350000262260437},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5210000276565552},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5148000121116638},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5112000107765198},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.48809999227523804},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.486299991607666},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3741999864578247},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.3634999990463257},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3549000024795532},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.3540000021457672},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.3441999852657318},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.3422999978065491},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.33169999718666077},{"id":"https://openalex.org/C131584629","wikidata":"https://www.wikidata.org/wiki/Q4308705","display_name":"Coupling (piping)","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.31209999322891235},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.30630001425743103},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.29989999532699585},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2992999851703644},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.2565000057220459},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.25440001487731934},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.2522999942302704},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tase.2025.3647137","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tase.2025.3647137","pdf_url":null,"source":{"id":"https://openalex.org/S34881539","display_name":"IEEE Transactions on Automation Science and Engineering","issn_l":"1545-5955","issn":["1545-5955","1558-3783"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automation Science and Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.5295326709747314,"id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G1545384812","display_name":null,"funder_award_id":"62225309","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2632104900","display_name":null,"funder_award_id":"2024YFB4708900","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4404341476","display_name":null,"funder_award_id":"2024YFB4708900","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G4785738499","display_name":null,"funder_award_id":"62073222","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5163237521","display_name":null,"funder_award_id":"U21A20480","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G665361890","display_name":null,"funder_award_id":"2025M781675","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G6990729683","display_name":null,"funder_award_id":"62361166632","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8857730616","display_name":null,"funder_award_id":"62473370","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1999102822","https://openalex.org/W3159619744","https://openalex.org/W3173859428","https://openalex.org/W3176709420","https://openalex.org/W4240384253","https://openalex.org/W4310557340","https://openalex.org/W4313026212","https://openalex.org/W4385430679","https://openalex.org/W4389667112","https://openalex.org/W4392825466","https://openalex.org/W4401210380","https://openalex.org/W4402716047","https://openalex.org/W4402727167","https://openalex.org/W4402727200","https://openalex.org/W4403390316","https://openalex.org/W4404128478","https://openalex.org/W4404612908","https://openalex.org/W4405785307","https://openalex.org/W4407637743","https://openalex.org/W4408323433","https://openalex.org/W4408914211","https://openalex.org/W4411867031","https://openalex.org/W4413144929","https://openalex.org/W4413145785","https://openalex.org/W4413886989","https://openalex.org/W4415100685"],"related_works":[],"abstract_inverted_index":{"Open-vocabulary":[0],"grasping":[1,204],"is":[2,127],"essential":[3],"for":[4,105],"enabling":[5,162],"robots":[6],"to":[7,55,59,74,110,129,164,179],"operate":[8],"robustly":[9],"in":[10,36,41,68,115,202,207],"open":[11],"and":[12,33,57,134,183,192,223,235],"dynamic":[13],"real-world":[14,208],"environments.":[15],"Current":[16],"methods":[17,66],"typically":[18],"focus":[19],"on":[20,220,225],"predicting":[21],"grasp":[22,215],"poses":[23],"from":[24,167],"fused":[25],"cross-modal":[26,69,116],"features.":[27,137],"The":[28,173],"coupling":[29],"of":[30,185,218],"visual":[31],"features":[32,133,175],"textual":[34],"semantics":[35,132,136],"the":[37,52,72,83,112,124,171,181,186,210,229],"feature":[38,125],"space":[39,126],"results":[40],"scene-level":[42],"representations.":[43],"However,":[44],"such":[45],"representations":[46],"lack":[47,71],"object-level":[48],"discrimination,":[49],"thereby":[50],"limiting":[51],"model\u2019s":[53,84],"ability":[54],"recognize":[56],"generalize":[58],"novel":[60,226],"targets.":[61],"Moreover,":[62,206],"existing":[63,200],"positional":[64],"encoding":[65],"employed":[67],"modeling":[70],"capacity":[73],"effectively":[75],"capture":[76],"spatial":[77,85,141],"relationships":[78],"between":[79],"regions,":[80],"which":[81,154],"limits":[82],"reasoning":[86],"performance.":[87],"To":[88,138],"tackle":[89],"these":[90],"challenges,":[91],"we":[92,144],"propose":[93],"a":[94,118,214],"method":[95,198,231],"that":[96,122,196],"combines":[97],"Structured":[98],"Semantic":[99],"Disentanglement":[100,120],"with":[101],"Attention":[102],"Diffusion":[103],"(SDAD)":[104],"open-vocabulary":[106,203],"robotic":[107],"grasping.":[108],"Specifically,":[109],"improve":[111,140],"semantic":[113],"disentanglement":[114],"features,":[117],"language-guided":[119],"strategy":[121],"regularizes":[123],"proposed":[128,211],"disentangle":[130],"target":[131,174],"conditional":[135],"further":[139],"context":[142],"modeling,":[143],"introduce":[145],"an":[146],"attention":[147,163],"diffusion":[148,157],"mechanism":[149],"inspired":[150],"by":[151,159,233],"Fick\u2019s":[152],"law,":[153],"describes":[155],"natural":[156],"driven":[158],"concentration":[160],"gradients,":[161],"propagate":[165],"smoothly":[166],"condition-feature-anchored":[168],"regions":[169],"across":[170],"scene.":[172],"are":[176],"subsequently":[177],"used":[178],"complete":[180],"matching":[182],"localization":[184],"corresponding":[187],"object":[188],"regions.":[189],"Extensive":[190],"quantitative":[191],"qualitative":[193],"experiments":[194],"demonstrate":[195],"our":[197],"outperforms":[199],"approaches":[201],"tasks.":[205],"scenarios,":[209],"model":[212],"achieves":[213],"success":[216],"rate":[217],"84%":[219],"base":[221],"categories":[222],"66%":[224],"categories,":[227],"outperforming":[228],"state-of-the-art":[230],"GLIPv2":[232],"7%":[234],"5%,":[236],"respectively.":[237]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-12-22T00:00:00"}
