{"id":"https://openalex.org/W4411142842","doi":"https://doi.org/10.1109/tcsvt.2025.3577617","title":"Zero6DOT: Zero-Shot 6D Object Pose Tracking With Monocular RGB Video","display_name":"Zero6DOT: Zero-Shot 6D Object Pose Tracking With Monocular RGB Video","publication_year":2025,"publication_date":"2025-06-09","ids":{"openalex":"https://openalex.org/W4411142842","doi":"https://doi.org/10.1109/tcsvt.2025.3577617"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3577617","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3577617","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Bo Pang","orcid":"https://orcid.org/0009-0005-8297-9850"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bo Pang","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103248507","display_name":"Deming Zhai","orcid":"https://orcid.org/0000-0003-0874-2175"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Deming Zhai","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026278492","display_name":"Jianan Zhen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128910","display_name":"Group Sense (China)","ror":"https://ror.org/036wd5777","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianan Zhen","raw_affiliation_strings":["SenseTime Ltd., Hangzhou, China","SenseTime Ltd, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"SenseTime Ltd., Hangzhou, China","institution_ids":["https://openalex.org/I4210128910"]},{"raw_affiliation_string":"SenseTime Ltd, Hangzhou, China","institution_ids":["https://openalex.org/I4210128910"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Long Wang","orcid":"https://orcid.org/0009-0008-8211-5448"},"institutions":[{"id":"https://openalex.org/I4210128910","display_name":"Group Sense (China)","ror":"https://ror.org/036wd5777","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Wang","raw_affiliation_strings":["SenseTime Ltd., Hangzhou, China","SenseTime Ltd, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"SenseTime Ltd., Hangzhou, China","institution_ids":["https://openalex.org/I4210128910"]},{"raw_affiliation_string":"SenseTime Ltd, Hangzhou, China","institution_ids":["https://openalex.org/I4210128910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101556951","display_name":"Xu Han","orcid":"https://orcid.org/0000-0001-6116-3258"},"institutions":[{"id":"https://openalex.org/I4210128910","display_name":"Group Sense (China)","ror":"https://ror.org/036wd5777","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Han","raw_affiliation_strings":["SenseTime Ltd., Hangzhou, China","SenseTime Ltd, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"SenseTime Ltd., Hangzhou, China","institution_ids":["https://openalex.org/I4210128910"]},{"raw_affiliation_string":"SenseTime Ltd, Hangzhou, China","institution_ids":["https://openalex.org/I4210128910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100693448","display_name":"Guofeng Zhang","orcid":"https://orcid.org/0000-0001-5661-8430"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guofeng Zhang","raw_affiliation_strings":["State Key Laboratory of CAD and CG, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of CAD and CG, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100654390","display_name":"Xianming Liu","orcid":"https://orcid.org/0000-0002-8857-1785"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianming Liu","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":1.2181,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.79775749,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"35","issue":"12","first_page":"12382","last_page":"12395"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9769999980926514,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9641000032424927,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.8067870736122131},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7673643827438354},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6835250854492188},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.6717145442962646},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.555637001991272},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.5160786509513855},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.45562151074409485},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.4409995675086975},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.41334694623947144},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.40492677688598633}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.8067870736122131},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7673643827438354},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6835250854492188},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.6717145442962646},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.555637001991272},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.5160786509513855},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.45562151074409485},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.4409995675086975},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.41334694623947144},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.40492677688598633},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3577617","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3577617","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2268537015","display_name":null,"funder_award_id":"92270116","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1505952289","https://openalex.org/W2050938785","https://openalex.org/W2117228865","https://openalex.org/W2151103935","https://openalex.org/W2194775991","https://openalex.org/W2344474200","https://openalex.org/W2904008996","https://openalex.org/W2962705366","https://openalex.org/W2962783853","https://openalex.org/W2963756608","https://openalex.org/W2963892972","https://openalex.org/W3034275286","https://openalex.org/W3043075211","https://openalex.org/W3109908659","https://openalex.org/W3115191265","https://openalex.org/W3133557228","https://openalex.org/W3166285241","https://openalex.org/W3190988267","https://openalex.org/W3210687399","https://openalex.org/W4226409831","https://openalex.org/W4281557677","https://openalex.org/W4281572148","https://openalex.org/W4306701944","https://openalex.org/W4312326540","https://openalex.org/W4312396403","https://openalex.org/W4312456564","https://openalex.org/W4313134354","https://openalex.org/W4318953210","https://openalex.org/W4364322067","https://openalex.org/W4385245566","https://openalex.org/W4386066287","https://openalex.org/W4386113267","https://openalex.org/W4387568706","https://openalex.org/W4390872919","https://openalex.org/W4390872950","https://openalex.org/W4390874121","https://openalex.org/W4393159097","https://openalex.org/W4396910007","https://openalex.org/W4400062087","https://openalex.org/W4400975093","https://openalex.org/W4401607695"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4285271403","https://openalex.org/W2542007731","https://openalex.org/W2968379562","https://openalex.org/W2091015105","https://openalex.org/W4388689193","https://openalex.org/W2110899030","https://openalex.org/W29633852","https://openalex.org/W2985362983","https://openalex.org/W4327670844"],"abstract_inverted_index":{"6D":[0,45,75,205],"object":[1,46,76,166,206],"tracking":[2,44,72,153,208],"plays":[3],"an":[4,65],"important":[5],"role":[6],"in":[7,40,52,78,106],"various":[8],"applications,":[9],"including":[10],"robotic":[11],"manipulation":[12],"and":[13,32,67,137,158,169,184,191],"virtual":[14],"reality.":[15],"While":[16],"current":[17],"methodologies":[18],"have":[19],"achieved":[20],"significant":[21],"advancements":[22],"through":[23,181],"the":[24,89,92,96,188,197],"use":[25],"of":[26,73,91,103,174,199],"CAD":[27],"models,":[28],"multi-modal":[29],"sensor":[30],"data,":[31],"category-level":[33],"assumptions,":[34],"such":[35,53,164],"resources":[36],"are":[37,119],"often":[38],"inaccessible":[39],"open-world":[41],"scenarios.":[42,211],"Consequently,":[43],"poses":[47,77,118],"using":[48],"only":[49,88],"RGB":[50,80],"data":[51],"scenarios":[54],"remains":[55],"a":[56,126,139],"challenging":[57,162],"task.":[58],"In":[59],"this":[60],"paper,":[61],"we":[62,124],"introduce":[63],"Zero6DOT,":[64],"innovative":[66],"efficient":[68],"method":[69],"for":[70,98,209],"real-time":[71],"unknown":[74],"monocular":[79],"video":[81],"sequences":[82],"at":[83],"8Hz.":[84],"Our":[85],"approach":[86,176],"requires":[87],"mask":[90],"initial":[93,132],"frame,":[94],"eliminating":[95],"need":[97],"additional":[99],"data.":[100],"The":[101,172,194],"core":[102],"Zero6DOT":[104,202],"lies":[105],"its":[107],"ability":[108],"to":[109,130,144,203],"establish":[110],"high-quality":[111],"correspondences":[112,134],"across":[113,135],"images,":[114],"from":[115],"which":[116],"accurate":[117],"derived.":[120],"To":[121],"achieve":[122],"this,":[123],"employ":[125],"transformer-based":[127],"neural":[128],"network":[129],"predict":[131],"long-term":[133],"frames":[136],"integrate":[138],"robust":[140],"Dynamic":[141],"Units":[142],"System":[143],"refine":[145],"these":[146],"predictions.":[147],"This":[148],"combination":[149],"facilitates":[150],"precise":[151],"pose":[152,207],"while":[154],"maintaining":[155],"both":[156,182],"efficiency":[157],"robustness,":[159],"even":[160],"under":[161],"conditions":[163],"as":[165],"disappearance,":[167],"reappearance,":[168],"handheld":[170],"motion.":[171],"effectiveness":[173],"our":[175,200],"has":[177],"been":[178],"rigorously":[179],"evaluated":[180],"qualitative":[183],"quantitative":[185],"analyses":[186],"on":[187],"OnePose,":[189],"YCB-V,":[190],"RBOT":[192],"datasets.":[193],"results":[195],"demonstrate":[196],"potential":[198],"proposed":[201],"redefine":[204],"real-world":[210]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
