{"id":"https://openalex.org/W4403510570","doi":"https://doi.org/10.1109/tcsvt.2024.3482439","title":"ZeroPose: CAD-Prompted Zero-Shot Object 6D Pose Estimation in Cluttered Scenes","display_name":"ZeroPose: CAD-Prompted Zero-Shot Object 6D Pose Estimation in Cluttered Scenes","publication_year":2024,"publication_date":"2024-10-17","ids":{"openalex":"https://openalex.org/W4403510570","doi":"https://doi.org/10.1109/tcsvt.2024.3482439"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3482439","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3482439","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101953737","display_name":"Jianqiu Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianqiu Chen","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055330568","display_name":"Zikun Zhou","orcid":"https://orcid.org/0000-0002-2687-7762"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zikun Zhou","raw_affiliation_strings":["Pengcheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Pengcheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100657908","display_name":"Mingshan Sun","orcid":"https://orcid.org/0000-0002-0785-7387"},"institutions":[{"id":"https://openalex.org/I4210128910","display_name":"Group Sense (China)","ror":"https://ror.org/036wd5777","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingshan Sun","raw_affiliation_strings":["SenseTime Research, Hong Kong, China","SenseTime Research, China"],"affiliations":[{"raw_affiliation_string":"SenseTime Research, Hong Kong, China","institution_ids":["https://openalex.org/I4210128910"]},{"raw_affiliation_string":"SenseTime Research, China","institution_ids":["https://openalex.org/I4210128910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024522346","display_name":"Rui Zhao","orcid":"https://orcid.org/0000-0001-5874-131X"},"institutions":[{"id":"https://openalex.org/I4210128910","display_name":"Group Sense (China)","ror":"https://ror.org/036wd5777","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Zhao","raw_affiliation_strings":["SenseTime Research, Hong Kong, China","SenseTime Research, China"],"affiliations":[{"raw_affiliation_string":"SenseTime Research, Hong Kong, China","institution_ids":["https://openalex.org/I4210128910"]},{"raw_affiliation_string":"SenseTime Research, China","institution_ids":["https://openalex.org/I4210128910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100675835","display_name":"Liwei Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128910","display_name":"Group Sense (China)","ror":"https://ror.org/036wd5777","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liwei Wu","raw_affiliation_strings":["SenseTime Research, Hong Kong, China","SenseTime Research, China"],"affiliations":[{"raw_affiliation_string":"SenseTime Research, Hong Kong, China","institution_ids":["https://openalex.org/I4210128910"]},{"raw_affiliation_string":"SenseTime Research, China","institution_ids":["https://openalex.org/I4210128910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025650981","display_name":"Tianpeng Bao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128910","display_name":"Group Sense (China)","ror":"https://ror.org/036wd5777","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianpeng Bao","raw_affiliation_strings":["SenseTime Research, Hong Kong, China","SenseTime Research, China"],"affiliations":[{"raw_affiliation_string":"SenseTime Research, Hong Kong, China","institution_ids":["https://openalex.org/I4210128910"]},{"raw_affiliation_string":"SenseTime Research, China","institution_ids":["https://openalex.org/I4210128910"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100740564","display_name":"Zhenyu He","orcid":"https://orcid.org/0000-0002-2546-8721"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenyu He","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101953737"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":2.8782,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.91863389,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"35","issue":"2","first_page":"1251","last_page":"1264"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9810000061988831,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9810000061988831,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12549","display_name":"Image and Object Detection Techniques","score":0.9787999987602234,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9761999845504761,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.7987111806869507},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7500107884407043},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6335685849189758},{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.5013453960418701},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4808814823627472},{"id":"https://openalex.org/keywords/cad","display_name":"CAD","score":0.45993757247924805},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4502897560596466},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.41969186067581177},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.4178805649280548},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3824860155582428},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3349090814590454},{"id":"https://openalex.org/keywords/engineering-drawing","display_name":"Engineering drawing","score":0.08802559971809387},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08561095595359802}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7987111806869507},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7500107884407043},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6335685849189758},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.5013453960418701},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4808814823627472},{"id":"https://openalex.org/C194789388","wikidata":"https://www.wikidata.org/wiki/Q17855283","display_name":"CAD","level":2,"score":0.45993757247924805},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4502897560596466},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.41969186067581177},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.4178805649280548},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3824860155582428},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3349090814590454},{"id":"https://openalex.org/C199639397","wikidata":"https://www.wikidata.org/wiki/Q1788588","display_name":"Engineering drawing","level":1,"score":0.08802559971809387},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08561095595359802},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3482439","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3482439","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2049981393","https://openalex.org/W2083624955","https://openalex.org/W2085261163","https://openalex.org/W2161168419","https://openalex.org/W2165290582","https://openalex.org/W2580726517","https://openalex.org/W2739110650","https://openalex.org/W2963150697","https://openalex.org/W2963177347","https://openalex.org/W2963188159","https://openalex.org/W2963225012","https://openalex.org/W2963892972","https://openalex.org/W2989915422","https://openalex.org/W2990613095","https://openalex.org/W3034712732","https://openalex.org/W3092774272","https://openalex.org/W3128632815","https://openalex.org/W3137905681","https://openalex.org/W3176164117","https://openalex.org/W3199947443","https://openalex.org/W3207966761","https://openalex.org/W3216621076","https://openalex.org/W4200604336","https://openalex.org/W4205274304","https://openalex.org/W4221153074","https://openalex.org/W4221167997","https://openalex.org/W4226095747","https://openalex.org/W4226409831","https://openalex.org/W4281557677","https://openalex.org/W4285102264","https://openalex.org/W4285233964","https://openalex.org/W4298014068","https://openalex.org/W4311640782","https://openalex.org/W4312359138","https://openalex.org/W4312594783","https://openalex.org/W4312982499","https://openalex.org/W4313134354","https://openalex.org/W4316660744","https://openalex.org/W4328007191","https://openalex.org/W4382365515","https://openalex.org/W4386071707","https://openalex.org/W4388192390","https://openalex.org/W4388266322","https://openalex.org/W4390190260","https://openalex.org/W4390873315","https://openalex.org/W4390874575","https://openalex.org/W4401385967","https://openalex.org/W4402727562","https://openalex.org/W4402915911","https://openalex.org/W6781542114","https://openalex.org/W6841994211","https://openalex.org/W6845896635","https://openalex.org/W6848478783","https://openalex.org/W6851800889","https://openalex.org/W6853020185","https://openalex.org/W6853219587","https://openalex.org/W6855955516","https://openalex.org/W6859023048","https://openalex.org/W6861232000","https://openalex.org/W6864332406","https://openalex.org/W6873551568"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2366718574","https://openalex.org/W2359774528","https://openalex.org/W4298312966","https://openalex.org/W2325697621"],"abstract_inverted_index":{"Many":[0],"robotics":[1],"and":[2,60,66,124,147,187,210],"industry":[3],"applications":[4,76],"have":[5],"a":[6,45,51,83,92,112,132,179,200],"high":[7],"demand":[8],"for":[9],"the":[10,14,21,36,108,128,135,138,167,170,174,193,212],"capability":[11],"to":[12,44,100,121],"estimate":[13,148],"6D":[15,150],"pose":[16,27,89,176],"of":[17,69,111,137],"novel":[18,46,84,101,113],"objects":[19,38,102],"from":[20,127],"cluttered":[22],"scene.":[23],"However,":[24],"existing":[25,162],"classic":[26],"estimation":[28,90,177],"methods":[29,49,164,209],"are":[30],"object-specific,":[31],"which":[32,55,182],"can":[33,142],"only":[34],"handle":[35],"specific":[37],"seen":[39],"during":[40],"training.":[41],"When":[42],"applied":[43],"object,":[47,114],"these":[48],"necessitate":[50],"cumbersome":[52],"onboarding":[53,70,119],"process,":[54],"involves":[56],"extensive":[57,64],"dataset":[58],"preparation":[59],"model":[61,105,110,130],"retraining.":[62,106],"The":[63],"duration":[65],"resource":[67],"consumption":[68],"limit":[71],"their":[72,149],"practicality":[73],"in":[74,117],"real-world":[75],"In":[77],"this":[78],"paper,":[79],"we":[80],"introduce":[81],"ZeroPose,":[82],"zero-shot":[85,163,201,214],"framework":[86,98],"that":[87,197],"performs":[88],"following":[91],"Discovery-Orientation-Registration":[93],"(DOR)":[94],"inference":[95,218],"pipeline.":[96],"This":[97],"generalizes":[99],"without":[103,152],"requiring":[104],"Given":[107],"CAD":[109,129],"ZeroPose":[115,198],"enables":[116],"seconds":[118],"time":[120],"extract":[122],"visual":[123],"geometric":[125],"embeddings":[126],"as":[131,199],"prompt.":[133],"With":[134],"prompting":[136],"above":[139],"embeddings,":[140],"DOR":[141,171],"discover":[143],"all":[144],"related":[145],"instances":[146],"poses":[151],"additional":[153],"human":[154],"interaction":[155],"or":[156],"presupposing":[157],"scene":[158],"conditions.":[159],"Compared":[160],"with":[161,206,216],"solved":[165],"by":[166],"render-and-compare":[168],"paradigm,":[169],"pipeline":[172],"formulates":[173],"object":[175],"into":[178],"feature-matching":[180],"problem,":[181],"avoids":[183],"time-consuming":[184],"online":[185],"rendering":[186],"improves":[188],"efficiency.":[189],"Experimental":[190],"results":[191],"on":[192],"seven":[194],"datasets":[195],"show":[196],"method":[202,215],"achieves":[203],"comparable":[204],"performance":[205],"object-specific":[207],"training":[208],"outperforms":[211],"state-of-the-art":[213],"50x":[217],"speed":[219],"improvement.":[220]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
