{"id":"https://openalex.org/W4408564107","doi":"https://doi.org/10.1109/tcsvt.2025.3552596","title":"Toward Realistic Hierarchical Object Detection: Problem, Benchmark, and Solution","display_name":"Toward Realistic Hierarchical Object Detection: Problem, Benchmark, and Solution","publication_year":2025,"publication_date":"2025-03-18","ids":{"openalex":"https://openalex.org/W4408564107","doi":"https://doi.org/10.1109/tcsvt.2025.3552596"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3552596","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3552596","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102635181","display_name":"Juexiao Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Juexiao Feng","raw_affiliation_strings":["National Research Center for Information Science and Technology (BNRist), School of Software, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-4614-8607","affiliations":[{"raw_affiliation_string":"National Research Center for Information Science and Technology (BNRist), School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102859655","display_name":"Yuhong Yang","orcid":"https://orcid.org/0000-0003-3001-7957"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhong Yang","raw_affiliation_strings":["National Research Center for Information Science and Technology (BNRist), School of Software, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Research Center for Information Science and Technology (BNRist), School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113022344","display_name":"Mengyao Lyu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengyao Lyu","raw_affiliation_strings":["National Research Center for Information Science and Technology (BNRist), School of Software, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Research Center for Information Science and Technology (BNRist), School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019176668","display_name":"Tianxiang Hao","orcid":"https://orcid.org/0000-0002-1952-6083"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianxiang Hao","raw_affiliation_strings":["National Research Center for Information Science and Technology (BNRist), School of Software, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1952-6083","affiliations":[{"raw_affiliation_string":"National Research Center for Information Science and Technology (BNRist), School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yi-Jie Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi-Jie Huang","raw_affiliation_strings":["OPPO Research Institute, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OPPO Research Institute, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101259621","display_name":"Yanchun Xie","orcid":"https://orcid.org/0000-0002-1410-7082"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yanchun Xie","raw_affiliation_strings":["OPPO Research Institute, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OPPO Research Institute, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100697598","display_name":"Yaqian Li","orcid":"https://orcid.org/0000-0003-1032-9910"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yaqian Li","raw_affiliation_strings":["OPPO Research Institute, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OPPO Research Institute, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046605531","display_name":"Jungong Han","orcid":"https://orcid.org/0000-0003-4361-956X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jungong Han","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4361-956X","affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077408167","display_name":"Liuyu Xiang","orcid":"https://orcid.org/0000-0001-8486-6255"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liuyu Xiang","raw_affiliation_strings":["School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-8486-6255","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057732142","display_name":"Guiguang Ding","orcid":"https://orcid.org/0000-0003-0137-9975"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guiguang Ding","raw_affiliation_strings":["OPPO Research Institute, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-0137-9975","affiliations":[{"raw_affiliation_string":"OPPO Research Institute, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5102635181"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03657143,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"35","issue":"9","first_page":"9351","last_page":"9364"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.680899977684021,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.680899977684021,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6761658787727356},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6042139530181885},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4525737464427948},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44355595111846924},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.43644237518310547},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.28647226095199585}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6761658787727356},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6042139530181885},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4525737464427948},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44355595111846924},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.43644237518310547},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28647226095199585},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3552596","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3552596","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1099563269","display_name":null,"funder_award_id":"No. 62301066","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3625099708","display_name":null,"funder_award_id":"2022B01006","funder_id":"https://openalex.org/F4320334010","funder_display_name":"Key Research and Development Program of Ningxia"},{"id":"https://openalex.org/G4800139155","display_name":null,"funder_award_id":"62301066","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7091651887","display_name":null,"funder_award_id":"62021002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8416519430","display_name":null,"funder_award_id":"Nos. 61925107","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334010","display_name":"Key Research and Development Program of Ningxia","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1982522767","https://openalex.org/W2010132303","https://openalex.org/W2031489346","https://openalex.org/W2081580037","https://openalex.org/W2116339064","https://openalex.org/W2528206015","https://openalex.org/W2570343428","https://openalex.org/W2893642647","https://openalex.org/W2893703358","https://openalex.org/W2965373594","https://openalex.org/W2983943451","https://openalex.org/W3035406632","https://openalex.org/W3108070979","https://openalex.org/W3199622762","https://openalex.org/W4288083516","https://openalex.org/W4288325606","https://openalex.org/W4304084306","https://openalex.org/W4312424618","https://openalex.org/W4312563428","https://openalex.org/W4312628289","https://openalex.org/W4312689172","https://openalex.org/W4312747482","https://openalex.org/W4312839074","https://openalex.org/W4312956471","https://openalex.org/W4321780048","https://openalex.org/W4390871915","https://openalex.org/W4390872427","https://openalex.org/W4401070169","https://openalex.org/W4411245287"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W4390721878","https://openalex.org/W4312842780","https://openalex.org/W2883677709","https://openalex.org/W4292830139","https://openalex.org/W4319309705"],"abstract_inverted_index":{"With":[0],"the":[1,29,68,82,87,106,150,172,188,205],"continuous":[2],"advancement":[3],"of":[4,19,31,84,152,168,190],"deep":[5],"learning,":[6],"object":[7,20,108,130],"detection":[8,49,131],"has":[9,158],"made":[10],"remarkable":[11],"progress":[12],"in":[13,86,154,161],"accurately":[14],"identifying":[15],"a":[16,39,91,115,127,166],"wide":[17],"range":[18,167],"categories,":[21],"even":[22],"within":[23],"increasingly":[24],"complex":[25],"scenes.":[26],"However,":[27],"as":[28],"number":[30],"categories":[32],"grows,":[33],"visual":[34],"concepts":[35],"naturally":[36],"organize":[37],"into":[38],"label":[40],"hierarchy.":[41],"We":[42,164],"contend":[43],"that":[44,133,198],"existing":[45,169],"hierarchical":[46,107,129],"classification":[47],"and":[48,141,144,178],"methods":[50,170],"predominantly":[51],"prioritize":[52],"fine-grained":[53,155],"prediction,":[54],"potentially":[55],"leading":[56],"to":[57,74,186],"inconsistencies":[58],"with":[59,77,97],"realistic":[60,111],"human":[61],"perception.":[62,79],"From":[63],"this":[64],"perspective,":[65],"we":[66,89,113,125,182],"investigate":[67],"Hierarchical":[69,120,142],"Object":[70],"Detection":[71],"(HOD)":[72],"problem":[73],"better":[75,104],"align":[76,105],"real-world":[78],"To":[80,103],"address":[81],"lack":[83],"benchmarks":[85],"field,":[88],"build":[90],"large-scale":[92],"HOD":[93],"benchmark":[94],"termed":[95],"RHOD":[96,173,206],"open-source":[98],"datasets,":[99],"comprising":[100],"740":[101],"categories.":[102],"detectors":[109],"towards":[110],"perception,":[112],"propose":[114],"new":[116],"evaluation":[117],"metric":[118],"named":[119],"Average":[121],"Precision":[122],"(HAP).":[123],"Furthermore,":[124],"present":[126],"novel":[128],"method":[132,148,200],"includes":[134],"two":[135],"components,":[136],"Tree":[137],"Soft":[138],"Labeling":[139],"(TSL)":[140],"Extension":[143],"Suppression":[145],"(HES).":[146],"Our":[147],"mitigates":[149],"issue":[151],"overconfidence":[153],"predictions,":[156],"which":[157],"been":[159],"prevalent":[160],"previous":[162],"approaches.":[163],"evaluate":[165],"on":[171,204],"benchmark,":[174],"including":[175],"plain,":[176],"hierarchical,":[177],"open-vocabulary":[179],"models.":[180],"Additionally,":[181],"perform":[183],"comprehensive":[184],"experiments":[185],"assess":[187],"performance":[189,203],"our":[191,199],"proposed":[192],"method.":[193],"The":[194],"experimental":[195],"results":[196],"show":[197],"achieves":[201],"state-of-the-art":[202],"benchmark.":[207]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-03-19T00:00:00"}
