{"id":"https://openalex.org/W4389666592","doi":"https://doi.org/10.1109/iros55552.2023.10342215","title":"Depth-Based 6DoF Object Pose Estimation Using Swin Transformer","display_name":"Depth-Based 6DoF Object Pose Estimation Using Swin Transformer","publication_year":2023,"publication_date":"2023-10-01","ids":{"openalex":"https://openalex.org/W4389666592","doi":"https://doi.org/10.1109/iros55552.2023.10342215"},"language":"en","primary_location":{"id":"doi:10.1109/iros55552.2023.10342215","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342215","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101760791","display_name":"Zhujun Li","orcid":"https://orcid.org/0000-0001-9886-7662"},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]},{"id":"https://openalex.org/I121847817","display_name":"The Graduate Center, CUNY","ror":"https://ror.org/00awd9g61","country_code":"US","type":"education","lineage":["https://openalex.org/I121847817"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhujun Li","raw_affiliation_strings":["City University of New York,The Graduate Center","The Graduate Center, City University of New York"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"City University of New York,The Graduate Center","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]},{"raw_affiliation_string":"The Graduate Center, City University of New York","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079727022","display_name":"Ioannis Stamos","orcid":null},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]},{"id":"https://openalex.org/I121847817","display_name":"The Graduate Center, CUNY","ror":"https://ror.org/00awd9g61","country_code":"US","type":"education","lineage":["https://openalex.org/I121847817"]},{"id":"https://openalex.org/I39694355","display_name":"Hunter College","ror":"https://ror.org/00g2xk477","country_code":"US","type":"education","lineage":["https://openalex.org/I39694355"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ioannis Stamos","raw_affiliation_strings":["City University of New York,The Graduate Center","Hunter College, City University of New York","The Graduate Center, City University of New York"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"City University of New York,The Graduate Center","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]},{"raw_affiliation_string":"Hunter College, City University of New York","institution_ids":["https://openalex.org/I39694355","https://openalex.org/I174216632"]},{"raw_affiliation_string":"The Graduate Center, City University of New York","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101760791"],"corresponding_institution_ids":["https://openalex.org/I121847817","https://openalex.org/I174216632"],"apc_list":null,"apc_paid":null,"fwci":4.0753,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.94550181,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1185","last_page":"1191"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.8167930245399475},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.8048814535140991},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.788953423500061},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.762698769569397},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.634297788143158},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.600044846534729},{"id":"https://openalex.org/keywords/augmented-reality","display_name":"Augmented reality","score":0.577452540397644},{"id":"https://openalex.org/keywords/3d-pose-estimation","display_name":"3D pose estimation","score":0.520578920841217},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.49732211232185364},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4539470672607422},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.43046531081199646},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14983105659484863}],"concepts":[{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.8167930245399475},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.8048814535140991},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.788953423500061},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.762698769569397},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.634297788143158},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.600044846534729},{"id":"https://openalex.org/C153715457","wikidata":"https://www.wikidata.org/wiki/Q254183","display_name":"Augmented reality","level":2,"score":0.577452540397644},{"id":"https://openalex.org/C36613465","wikidata":"https://www.wikidata.org/wiki/Q4636322","display_name":"3D pose estimation","level":3,"score":0.520578920841217},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.49732211232185364},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4539470672607422},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.43046531081199646},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14983105659484863},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros55552.2023.10342215","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342215","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/13","score":0.5,"display_name":"Climate action"}],"awards":[{"id":"https://openalex.org/G3570715612","display_name":null,"funder_award_id":"CNS1625843","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W132147841","https://openalex.org/W1969868017","https://openalex.org/W1988874269","https://openalex.org/W2058761328","https://openalex.org/W2059630294","https://openalex.org/W2067191022","https://openalex.org/W2074142320","https://openalex.org/W2146356699","https://openalex.org/W2520352517","https://openalex.org/W2561343020","https://openalex.org/W2566131234","https://openalex.org/W2884822772","https://openalex.org/W2963150697","https://openalex.org/W2963177347","https://openalex.org/W2963188159","https://openalex.org/W2963351448","https://openalex.org/W2963756608","https://openalex.org/W3008195728","https://openalex.org/W3009516594","https://openalex.org/W3012494314","https://openalex.org/W3034268164","https://openalex.org/W3034986117","https://openalex.org/W3091481715","https://openalex.org/W3118791395","https://openalex.org/W3131948714","https://openalex.org/W3138516171","https://openalex.org/W3174822420","https://openalex.org/W3179923621","https://openalex.org/W3205341492","https://openalex.org/W4226017686","https://openalex.org/W4234552385","https://openalex.org/W4283732315","https://openalex.org/W4307415270","https://openalex.org/W4312339117","https://openalex.org/W4312877043","https://openalex.org/W4312982499","https://openalex.org/W6763422710","https://openalex.org/W6810696484","https://openalex.org/W6846703903"],"related_works":["https://openalex.org/W4320086129","https://openalex.org/W4253893311","https://openalex.org/W2798721181","https://openalex.org/W3201205132","https://openalex.org/W4287600488","https://openalex.org/W4312694060","https://openalex.org/W4281696776","https://openalex.org/W4318148659","https://openalex.org/W4387967917","https://openalex.org/W4299867837"],"abstract_inverted_index":{"Accurately":[0],"estimating":[1],"the":[2,58,72,111,123,128,153,192,204,228,236],"6D":[3,105,183,215],"pose":[4,75,106,217],"of":[5,61,74,238],"objects":[6],"is":[7,142,253],"crucial":[8],"for":[9,214,245],"many":[10],"applications,":[11],"such":[12],"as":[13],"robotic":[14],"grasping,":[15],"autonomous":[16],"driving,":[17],"and":[18,57,80,122,161,167,174,198,206,241],"augmented":[19],"reality.":[20],"However,":[21,65],"this":[22,39,85],"task":[23],"becomes":[24],"more":[25],"challenging":[26],"in":[27,118,127,248],"poor":[28],"lighting":[29],"conditions":[30],"or":[31],"when":[32],"dealing":[33],"with":[34],"textureless":[35],"objects.":[36],"To":[37,83],"address":[38],"issue,":[40],"depth":[41,68,100,120,220],"images":[42,101],"are":[43,135,165],"becoming":[44],"an":[45,139],"increasingly":[46],"popular":[47],"choice":[48],"due":[49],"to":[50,53,70,102,151],"their":[51],"invariance":[52],"a":[54,78,89,119,170,175,186],"scene's":[55],"appearance":[56],"implicit":[59],"incorporation":[60],"essential":[62],"geometric":[63,97],"characteristics.":[64],"fully":[66],"leveraging":[67],"information":[69,98],"improve":[71],"performance":[73,247],"estimation":[76,218],"remains":[77],"difficult":[79],"under-investigated":[81],"problem.":[82],"tackle":[84],"challenge,":[86],"we":[87,148,181],"propose":[88],"novel":[90],"framework":[91],"called":[92],"SwinDePose,":[93],"that":[94],"uses":[95],"only":[96],"from":[99,155],"achieve":[103],"accurate":[104],"estimation.":[107],"SwinDePose":[108,209],"first":[109],"calculates":[110],"angles":[112,134],"between":[113],"each":[114],"normal":[115],"vector":[116],"defined":[117],"image":[121,160],"three":[124],"coordinate":[125,130],"axes":[126],"camera":[129],"system.":[131],"The":[132,158],"resulting":[133,159],"then":[136],"formed":[137],"into":[138,169],"image,":[140],"which":[141],"encoded":[143],"using":[144,185,219],"Swin":[145],"Transformer.":[146],"Additionally,":[147],"apply":[149],"RandLA-Net":[150],"learn":[152],"representations":[154],"point":[156,162],"clouds.":[157],"clouds":[163],"embeddings":[164],"concatenated":[166],"fed":[168],"semantic":[171,196],"segmentation":[172],"module":[173],"3D":[176,199],"keypoints":[177],"localization":[178],"module.":[179],"Finally,":[180],"estimate":[182],"poses":[184],"least-square":[187],"fitting":[188],"approach":[189,240],"based":[190],"on":[191,203,227],"target":[193],"object's":[194],"predicted":[195],"mask":[197],"keypoints.":[200],"In":[201],"experiments":[202],"LineMod":[205],"Occlusion":[207],"LineMod,":[208],"outperforms":[210],"existing":[211],"state-of-the-art":[212],"methods":[213],"object":[216],"images.":[221],"We":[222],"also":[223],"provide":[224],"competitive":[225],"results":[226],"YCB-Video":[229],"dataset":[230],"even":[231],"without":[232],"post-processing.":[233],"This":[234],"demonstrates":[235],"effectiveness":[237],"our":[239],"highlights":[242],"its":[243],"potential":[244],"improving":[246],"real-world":[249],"scenarios.":[250],"Our":[251],"code":[252],"at":[254],"https://github.com/zhujunli1993/SwinDePose.":[255]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
