{"id":"https://openalex.org/W4401417458","doi":"https://doi.org/10.1109/icra57147.2024.10611464","title":"ZS6D: Zero-shot 6D Object Pose Estimation using Vision Transformers","display_name":"ZS6D: Zero-shot 6D Object Pose Estimation using Vision Transformers","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401417458","doi":"https://doi.org/10.1109/icra57147.2024.10611464"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10611464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001680599","display_name":"Philipp Ausserlechner","orcid":null},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Philipp Ausserlechner","raw_affiliation_strings":["Automation and Control Institute,Vision for Robotics Laboratory,TU Wien,Austria"],"affiliations":[{"raw_affiliation_string":"Automation and Control Institute,Vision for Robotics Laboratory,TU Wien,Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092926029","display_name":"David Haberger","orcid":null},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"David Haberger","raw_affiliation_strings":["Automation and Control Institute,Vision for Robotics Laboratory,TU Wien,Austria"],"affiliations":[{"raw_affiliation_string":"Automation and Control Institute,Vision for Robotics Laboratory,TU Wien,Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074166804","display_name":"Stefan Thalhammer","orcid":"https://orcid.org/0000-0002-0008-430X"},"institutions":[{"id":"https://openalex.org/I121760703","display_name":"University of Applied Sciences Technikum Wien","ror":"https://ror.org/04jsx0x49","country_code":"AT","type":"education","lineage":["https://openalex.org/I121760703"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Stefan Thalhammer","raw_affiliation_strings":["University of Applied Sciences Technikum Vienna,Industrial Engineering Department,Austria"],"affiliations":[{"raw_affiliation_string":"University of Applied Sciences Technikum Vienna,Industrial Engineering Department,Austria","institution_ids":["https://openalex.org/I121760703"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038121024","display_name":"Jean-Baptiste Weibel","orcid":"https://orcid.org/0000-0003-0201-4740"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Jean-Baptiste Weibel","raw_affiliation_strings":["Automation and Control Institute,Vision for Robotics Laboratory,TU Wien,Austria"],"affiliations":[{"raw_affiliation_string":"Automation and Control Institute,Vision for Robotics Laboratory,TU Wien,Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013565399","display_name":"Markus Vincze","orcid":"https://orcid.org/0000-0002-2799-491X"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Markus Vincze","raw_affiliation_strings":["Automation and Control Institute,Vision for Robotics Laboratory,TU Wien,Austria"],"affiliations":[{"raw_affiliation_string":"Automation and Control Institute,Vision for Robotics Laboratory,TU Wien,Austria","institution_ids":["https://openalex.org/I145847075"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5001680599"],"corresponding_institution_ids":["https://openalex.org/I145847075"],"apc_list":null,"apc_paid":null,"fwci":10.7289,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.9896034,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"463","last_page":"469"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12549","display_name":"Image and Object Detection Techniques","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.7507794499397278},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6814889907836914},{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.6409718990325928},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5851752161979675},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.561006486415863},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.4390642046928406},{"id":"https://openalex.org/keywords/cognitive-neuroscience-of-visual-object-recognition","display_name":"Cognitive neuroscience of visual object recognition","score":0.42251744866371155},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.3426108658313751},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14919951558113098},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.08084315061569214},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.05745628476142883}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7507794499397278},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6814889907836914},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.6409718990325928},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5851752161979675},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.561006486415863},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.4390642046928406},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.42251744866371155},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.3426108658313751},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14919951558113098},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.08084315061569214},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.05745628476142883},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10611464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Climate action","score":0.41999998688697815,"id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W132147841","https://openalex.org/W2036273537","https://openalex.org/W2063463366","https://openalex.org/W2085261163","https://openalex.org/W2117539524","https://openalex.org/W2161168419","https://openalex.org/W2196978909","https://openalex.org/W2580726517","https://openalex.org/W2962783853","https://openalex.org/W2963150697","https://openalex.org/W2963188159","https://openalex.org/W2981378444","https://openalex.org/W3009516594","https://openalex.org/W3034268164","https://openalex.org/W3034573608","https://openalex.org/W3092774272","https://openalex.org/W3094502228","https://openalex.org/W3159481202","https://openalex.org/W3177069133","https://openalex.org/W3199814225","https://openalex.org/W3199947443","https://openalex.org/W3205232447","https://openalex.org/W3207257716","https://openalex.org/W4221167997","https://openalex.org/W4298014068","https://openalex.org/W4298395628","https://openalex.org/W4311640782","https://openalex.org/W4312359138","https://openalex.org/W4312445439","https://openalex.org/W4319301140","https://openalex.org/W4321319299","https://openalex.org/W4366208220","https://openalex.org/W4383097421","https://openalex.org/W4385245566","https://openalex.org/W4386630189","https://openalex.org/W4390190260","https://openalex.org/W4390874575","https://openalex.org/W4402916825","https://openalex.org/W6754227600","https://openalex.org/W6763367864","https://openalex.org/W6784333009","https://openalex.org/W6798369812","https://openalex.org/W6811470611","https://openalex.org/W6847324821","https://openalex.org/W6851800889","https://openalex.org/W6852616512"],"related_works":["https://openalex.org/W2123263858","https://openalex.org/W3127959533","https://openalex.org/W4387967917","https://openalex.org/W2114275278","https://openalex.org/W1489511283","https://openalex.org/W2769899322","https://openalex.org/W2974914859","https://openalex.org/W2026565050","https://openalex.org/W2110244802","https://openalex.org/W949345935"],"abstract_inverted_index":{"As":[0],"robotic":[1],"systems":[2],"increasingly":[3],"encounter":[4],"complex":[5],"and":[6,29,65,123,135,187,202],"unconstrained":[7],"real-world":[8],"scenarios,":[9],"there":[10],"is":[11,71,213],"a":[12,74,163],"demand":[13],"to":[14,34,85,161,191,204],"recognize":[15],"diverse":[16],"objects.":[17,36],"The":[18,211],"state-of-the-art":[19,168],"6D":[20,101,142,171],"object":[21,39,100,170],"pose":[22,40,58,102,143,172],"estimation":[23,41,59,173],"methods":[24,42],"rely":[25],"on":[26,73,184,198,208],"object-specific":[27],"training":[28,66],"therefore":[30],"do":[31],"not":[32],"generalize":[33],"unseen":[35],"Recent":[37],"novel":[38,99,169],"are":[43,112,136,159,182],"solving":[44],"this":[45,92],"issue":[46],"using":[47,107],"task-specific":[48,179],"fine-tuned":[49],"CNNs":[50],"for":[51,57,69,97,114,124,138,178],"deep":[52],"template":[53],"matching.":[54],"This":[55,148],"adaptation":[56],"still":[60],"requires":[61],"expensive":[62],"data":[63],"rendering":[64],"procedures.":[67],"MegaPose":[68],"example":[70],"trained":[72],"dataset":[75],"consisting":[76],"of":[77,121],"two":[78,167,209],"million":[79],"images":[80,120],"showing":[81],"20,000":[82],"different":[83],"objects":[84,122],"reach":[86],"such":[87],"generalization":[88],"capabilities.":[89],"To":[90],"overcome":[91],"shortcoming":[93],"we":[94,193,206],"introduce":[95],"ZS6D,":[96],"zero-shot":[98],"estimation.":[103],"Visual":[104],"descriptors,":[105],"extracted":[106,155],"pre-trained":[108,157],"Vision":[109],"Transformers":[110],"(ViT),":[111],"used":[113,137],"matching":[115],"rendered":[116],"templates":[117],"against":[118],"query":[119],"establishing":[125],"local":[126,129],"correspondences.":[127],"These":[128],"correspondences":[130,134],"enable":[131],"deriving":[132],"geometric":[133],"estimating":[139],"the":[140,152,176,195],"object's":[141],"with":[144],"RANSAC-":[145],"based":[146],"PnP.":[147],"approach":[149],"showcases":[150],"that":[151],"image":[153],"descriptors":[154],"by":[156],"ViTs":[158],"well-suited":[160],"achieve":[162],"notable":[164],"improvement":[165],"over":[166],"methods,":[174],"without":[175],"need":[177],"fine-tuning.":[180],"Experiments":[181],"performed":[183],"LMO,":[185],"YCBV,":[186],"TLESS.":[188],"In":[189],"comparison":[190],"MegaPose,":[192],"improve":[194,207],"Average":[196],"Recall":[197],"all":[199],"three":[200],"datasets":[201],"compared":[203],"OSOP":[205],"datasets.":[210],"code":[212],"available":[214],"at":[215],"https://github.com/PhilippAuss/ZS6D.":[216]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":20},{"year":2024,"cited_by_count":9}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
