{"id":"https://openalex.org/W4413918264","doi":"https://doi.org/10.1109/icra55743.2025.11128151","title":"DA-Fusion: Deformable Attention-Based RGB-D Fusion Transformer for Unseen Object Instance Segmentation","display_name":"DA-Fusion: Deformable Attention-Based RGB-D Fusion Transformer for Unseen Object Instance Segmentation","publication_year":2025,"publication_date":"2025-05-19","ids":{"openalex":"https://openalex.org/W4413918264","doi":"https://doi.org/10.1109/icra55743.2025.11128151"},"language":"en","primary_location":{"id":"doi:10.1109/icra55743.2025.11128151","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11128151","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yesol Park","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yesol Park","raw_affiliation_strings":["Seoul National University,Interdisciplinary Program in AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University,Interdisciplinary Program in AI","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009471318","display_name":"Hye\u2010Jung Yoon","orcid":"https://orcid.org/0000-0002-3534-3295"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hye-Jung Yoon","raw_affiliation_strings":["Seoul National University,Interdisciplinary Program in AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University,Interdisciplinary Program in AI","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024866431","display_name":"Juno Kim","orcid":"https://orcid.org/0000-0003-1300-9875"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Juno Kim","raw_affiliation_strings":["Seoul National University,Interdisciplinary Program in AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University,Interdisciplinary Program in AI","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110207888","display_name":"Byoung-Tak Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Byoung-Tak Zhang","raw_affiliation_strings":["Seoul National University,Interdisciplinary Program in AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University,Interdisciplinary Program in AI","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.809,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.87417219,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"7490","last_page":"7496"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.9729999899864197,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7014445066452026},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.6682205200195312},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6480238437652588},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6301416158676147},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5965490341186523},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.4363977909088135}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7014445066452026},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.6682205200195312},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6480238437652588},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6301416158676147},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5965490341186523},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.4363977909088135},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra55743.2025.11128151","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11128151","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2094790148","display_name":null,"funder_award_id":"RS-2024-00353991-SPARC,RS-2023-00274280-HEI","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G375614148","display_name":null,"funder_award_id":"RS-2021-II212068AIHub,RS-2021-II211343-GSAI,RS-2022-II220951-LBA,RS-2022-II220953-PICA","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G4903766688","display_name":null,"funder_award_id":"RS-2024-00423940/10","funder_id":"https://openalex.org/F4320334879","funder_display_name":"Korea Evaluation Institute of Industrial Technology"}],"funders":[{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320334879","display_name":"Korea Evaluation Institute of Industrial Technology","ror":"https://ror.org/03z9cwa38"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1565402342","https://openalex.org/W1972630525","https://openalex.org/W2307599347","https://openalex.org/W2603737562","https://openalex.org/W2890593621","https://openalex.org/W2962914239","https://openalex.org/W2963150697","https://openalex.org/W2963896186","https://openalex.org/W2967153639","https://openalex.org/W3010048653","https://openalex.org/W3033020497","https://openalex.org/W3112503277","https://openalex.org/W3137905681","https://openalex.org/W3138516171","https://openalex.org/W3198139586","https://openalex.org/W3200139538","https://openalex.org/W4210922806","https://openalex.org/W4223431326","https://openalex.org/W4312815172","https://openalex.org/W4312876407","https://openalex.org/W4313068473","https://openalex.org/W4378189111","https://openalex.org/W4386179772","https://openalex.org/W4389667009","https://openalex.org/W4390874575"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"In":[0],"logistics":[1,137],"automation,":[2],"precise":[3],"segmentation":[4,91],"of":[5,84],"unseen":[6,75],"objects":[7,42],"is":[8],"crucial":[9],"for":[10,74,113,135],"efficient":[11],"robotic":[12],"manipulation":[13],"in":[14,93,117],"cluttered":[15,94],"environments.":[16,98],"Tasks":[17],"such":[18],"as":[19],"bin-picking":[20,115],"and":[21,32,87,95],"shelfpicking":[22],"require":[23],"robust":[24],"perception":[25],"to":[26,40,44],"handle":[27],"occlusions,":[28],"varying":[29],"object":[30,76,97],"shapes,":[31],"complex":[33],"spatial":[34],"arrangements.":[35],"Traditional":[36],"RGB-based":[37],"methods":[38,51,127],"tend":[39],"over-segment":[41],"due":[43],"their":[45],"reliance":[46],"on":[47,57],"texture,":[48],"while":[49],"depth-based":[50],"often":[52],"under-segment":[53],"by":[54],"focusing":[55],"primarily":[56],"geometric":[58],"features.":[59],"To":[60],"address":[61],"these":[62],"limitations,":[63],"we":[64],"propose":[65],"DA-Fusion,":[66],"a":[67,108],"deformable":[68],"attention-based":[69],"RGB-D":[70],"fusion":[71],"Transformer":[72],"designed":[73],"instance":[77],"segmentation.":[78],"DA-Fusion":[79,124],"effectively":[80],"combines":[81],"the":[82,102],"strengths":[83],"both":[85],"RGB":[86],"depth":[88],"data,":[89],"enhancing":[90],"accuracy":[92],"multi-layered":[96],"We":[99],"also":[100],"introduce":[101],"Object":[103],"Clutter":[104],"Bin":[105],"Dataset":[106],"(OCBD),":[107],"benchmark":[109],"dataset":[110],"specifically":[111],"tailored":[112],"evaluating":[114],"scenarios":[116],"top-down":[118],"views.":[119],"Extensive":[120],"evaluations":[121],"demonstrate":[122],"that":[123],"outperforms":[125],"state-of-the-art":[126],"across":[128],"diverse":[129],"environments,":[130],"making":[131],"it":[132],"particularly":[133],"suited":[134],"real-world":[136],"tasks.":[138]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
