{"id":"https://openalex.org/W4400726600","doi":"https://doi.org/10.1109/lra.2024.3430156","title":"Open-Vocabulary Category-Level Object Pose and Size Estimation","display_name":"Open-Vocabulary Category-Level Object Pose and Size Estimation","publication_year":2024,"publication_date":"2024-07-17","ids":{"openalex":"https://openalex.org/W4400726600","doi":"https://doi.org/10.1109/lra.2024.3430156"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2024.3430156","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3430156","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003897229","display_name":"Junhao Cai","orcid":"https://orcid.org/0000-0002-1440-0406"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Junhao Cai","raw_affiliation_strings":["The Hong Kong University of Science and Technology, Clear Water Bay, Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology, Clear Water Bay, Hong Kong","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105667690","display_name":"Yisheng He","orcid":"https://orcid.org/0000-0002-4029-5494"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yisheng He","raw_affiliation_strings":["Institute for Intelligent Computing, Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Institute for Intelligent Computing, Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069033376","display_name":"Weihao Yuan","orcid":"https://orcid.org/0000-0002-1362-3747"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihao Yuan","raw_affiliation_strings":["Institute for Intelligent Computing, Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Institute for Intelligent Computing, Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001669712","display_name":"Siyu Zhu","orcid":"https://orcid.org/0000-0002-3214-3036"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siyu Zhu","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019743637","display_name":"Zilong Dong","orcid":"https://orcid.org/0000-0002-6833-9102"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zilong Dong","raw_affiliation_strings":["Institute for Intelligent Computing, Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Institute for Intelligent Computing, Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085032007","display_name":"Liefeng Bo","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liefeng Bo","raw_affiliation_strings":["Institute for Intelligent Computing, Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Institute for Intelligent Computing, Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100719529","display_name":"Qifeng Chen","orcid":"https://orcid.org/0000-0003-2199-3948"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Qifeng Chen","raw_affiliation_strings":["The Hong Kong University of Science and Technology, Clear Water Bay, Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology, Clear Water Bay, Hong Kong","institution_ids":["https://openalex.org/I200769079"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5003897229"],"corresponding_institution_ids":["https://openalex.org/I200769079"],"apc_list":null,"apc_paid":null,"fwci":3.6415,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.93702407,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"9","issue":"9","first_page":"7661","last_page":"7668"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9765999913215637,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9765999913215637,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9761999845504761,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9602000117301941,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6355171799659729},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6320856809616089},{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.6148557066917419},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.5686829686164856},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5581221580505371},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5356554388999939},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47142547369003296},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3399769961833954},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.27465546131134033},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07187497615814209}],"concepts":[{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6355171799659729},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6320856809616089},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.6148557066917419},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.5686829686164856},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5581221580505371},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5356554388999939},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47142547369003296},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3399769961833954},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.27465546131134033},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07187497615814209},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lra.2024.3430156","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3430156","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-143523","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-143523","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.75}],"awards":[{"id":"https://openalex.org/G4716406613","display_name":null,"funder_award_id":"2023A0505010004","funder_id":"https://openalex.org/F4320324202","funder_display_name":"Guangdong Science and Technology Department"}],"funders":[{"id":"https://openalex.org/F4320324202","display_name":"Guangdong Science and Technology Department","ror":"https://ror.org/00tjzgn92"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W1526868886","https://openalex.org/W1536680647","https://openalex.org/W2049981393","https://openalex.org/W2085261163","https://openalex.org/W2115579991","https://openalex.org/W2128019145","https://openalex.org/W2134236847","https://openalex.org/W2194775991","https://openalex.org/W2344474200","https://openalex.org/W2520803775","https://openalex.org/W2901412466","https://openalex.org/W2962783853","https://openalex.org/W2963177347","https://openalex.org/W2963188159","https://openalex.org/W2963892972","https://openalex.org/W3034597466","https://openalex.org/W3034986117","https://openalex.org/W3094502228","https://openalex.org/W3107372911","https://openalex.org/W3107992529","https://openalex.org/W3111487461","https://openalex.org/W3159481202","https://openalex.org/W3179923621","https://openalex.org/W3180355996","https://openalex.org/W3193686508","https://openalex.org/W3196328566","https://openalex.org/W3196466825","https://openalex.org/W3202459445","https://openalex.org/W4205105995","https://openalex.org/W4224598146","https://openalex.org/W4225435562","https://openalex.org/W4226321491","https://openalex.org/W4226322645","https://openalex.org/W4283775758","https://openalex.org/W4285048978","https://openalex.org/W4285102264","https://openalex.org/W4312251507","https://openalex.org/W4312296985","https://openalex.org/W4312359138","https://openalex.org/W4312747482","https://openalex.org/W4312819733","https://openalex.org/W4312933868","https://openalex.org/W4313134354","https://openalex.org/W4321377343","https://openalex.org/W4385245566","https://openalex.org/W4385805054","https://openalex.org/W4385814207","https://openalex.org/W4386066287","https://openalex.org/W4386075819","https://openalex.org/W4386113267","https://openalex.org/W4388755200","https://openalex.org/W4390873429","https://openalex.org/W4390873470","https://openalex.org/W4391547487","https://openalex.org/W6687484953","https://openalex.org/W6757817989","https://openalex.org/W6791353385","https://openalex.org/W6838639034","https://openalex.org/W6845903860","https://openalex.org/W6846007759","https://openalex.org/W6851800889","https://openalex.org/W6852690433","https://openalex.org/W6853660567","https://openalex.org/W6854266301"],"related_works":["https://openalex.org/W2123263858","https://openalex.org/W3127959533","https://openalex.org/W2349784553","https://openalex.org/W3022596247","https://openalex.org/W4387967917","https://openalex.org/W2601444686","https://openalex.org/W4307058054","https://openalex.org/W4386925306","https://openalex.org/W4387968151","https://openalex.org/W3132124459"],"abstract_inverted_index":{"This":[0,148],"letter":[1],"studies":[2],"a":[3,52,124],"new":[4],"open-set":[5],"problem,":[6],"the":[7,24,30,36,40,64,71,85,98,112,137,145,152,159,166,187,199],"open-vocabulary":[8,189],"category-level":[9,74],"object":[10,22,38,75,139],"pose":[11,76],"and":[12,33,66,77,130,158,162,182,201],"size":[13,34,78],"estimation.":[14,79],"Given":[15],"human":[16],"text":[17,175],"descriptions":[18,176],"of":[19,35,73,88,144,177,208],"arbitrary":[20],"novel":[21,178],"categories,":[23],"robot":[25],"agent":[26],"seeks":[27],"to":[28,106,135,173,205],"predict":[29],"position,":[31],"orientation,":[32],"target":[37,146],"in":[39,70,116],"observed":[41],"scene":[42],"image.":[43],"To":[44],"enable":[45],"such":[46,108],"generalizability,":[47],"we":[48,101],"first":[49],"introduce":[50],"OO3D-9D,":[51],"large-scale":[53,99,194],"photorealistic":[54],"dataset":[55,69],"for":[56,84],"this":[57],"task.":[58],"Derived":[59],"from":[60,97,156],"OmniObject3D,":[61],"OO3D-9D":[62],"is":[63,110],"largest":[65],"most":[67],"diverse":[68],"field":[72],"It":[80],"includes":[81],"additional":[82],"annotations":[83],"symmetry":[86],"axis":[87],"each":[89],"category,":[90],"which":[91,170],"help":[92],"resolve":[93],"symmetric":[94],"ambiguity.":[95],"Apart":[96],"dataset,":[100],"find":[102],"another":[103],"key":[104],"factor":[105],"enabling":[107],"generalizability":[109],"leveraging":[111],"strong":[113],"prior":[114,155],"knowledge":[115,164],"pre-trained":[117,128],"visual-language":[118],"foundation":[119],"models.":[120],"We":[121],"then":[122],"propose":[123],"framework":[125,149],"built":[126],"on":[127,192],"DinoV2":[129,157],"text-to-image":[131,167],"stable":[132],"diffusion":[133,168],"models":[134],"infer":[136],"normalized":[138],"coordinate":[140],"space":[141],"(NOCS)":[142],"maps":[143],"instances.":[147],"fully":[150],"leverages":[151],"visual":[153,161],"semantic":[154],"aligned":[160],"language":[163],"within":[165],"model,":[169],"enables":[171],"generalization":[172],"various":[174],"categories.":[179,210],"Comprehensive":[180],"quantitative":[181],"qualitative":[183],"experiments":[184],"demonstrate":[185],"that":[186],"proposed":[188],"method,":[190],"trained":[191],"our":[193],"synthesized":[195],"data,":[196],"significantly":[197],"outperforms":[198],"baseline":[200],"can":[202],"effectively":[203],"generalize":[204],"real-world":[206],"images":[207],"unseen":[209]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":9}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
