{"id":"https://openalex.org/W4415536483","doi":"https://doi.org/10.1145/3746027.3755483","title":"Fine-grained Zero-Shot Object Detection","display_name":"Fine-grained Zero-Shot Object Detection","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415536483","doi":"https://doi.org/10.1145/3746027.3755483"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755483","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755483","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056459475","display_name":"Hongxu Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongxu Ma","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033526358","display_name":"Chenbo Zhang","orcid":"https://orcid.org/0000-0002-6773-598X"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenbo Zhang","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100388587","display_name":"Lu Zhang","orcid":"https://orcid.org/0000-0001-9532-5219"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lu Zhang","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047121958","display_name":"Jiaogen Zhou","orcid":"https://orcid.org/0000-0003-1701-1489"},"institutions":[{"id":"https://openalex.org/I4210147117","display_name":"Huaiyin Normal University","ror":"https://ror.org/03xvggv44","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210147117"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaogen Zhou","raw_affiliation_strings":["Huaiyin Normal Univeristy, Huaian, China"],"affiliations":[{"raw_affiliation_string":"Huaiyin Normal Univeristy, Huaian, China","institution_ids":["https://openalex.org/I4210147117"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086316879","display_name":"Jihong Guan","orcid":"https://orcid.org/0000-0003-2313-7635"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jihong Guan","raw_affiliation_strings":["Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017862559","display_name":"Shuigeng Zhou","orcid":"https://orcid.org/0000-0002-1949-2768"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuigeng Zhou","raw_affiliation_strings":["Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5056459475"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30340851,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4504","last_page":"4513"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12386","display_name":"Advanced X-ray and CT Imaging","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12549","display_name":"Image and Object Detection Techniques","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.637499988079071},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6295999884605408},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6093000173568726},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5703999996185303},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.436599999666214},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4309000074863434},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4253999888896942}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6779999732971191},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6395000219345093},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.637499988079071},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6295999884605408},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6093000173568726},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5703999996185303},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5314000248908997},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.436599999666214},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4309000074863434},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4253999888896942},{"id":"https://openalex.org/C182521987","wikidata":"https://www.wikidata.org/wiki/Q2493877","display_name":"Viola\u2013Jones object detection framework","level":5,"score":0.38089999556541443},{"id":"https://openalex.org/C4641261","wikidata":"https://www.wikidata.org/wiki/Q11681085","display_name":"Face detection","level":4,"score":0.3671000003814697},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.3402000069618225},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2867000102996826},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2565999925136566}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755483","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755483","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W1861492603","https://openalex.org/W2117539524","https://openalex.org/W2138011018","https://openalex.org/W2533598788","https://openalex.org/W2773003563","https://openalex.org/W2963936013","https://openalex.org/W3009340385","https://openalex.org/W3034974675","https://openalex.org/W3035220232","https://openalex.org/W3035655772","https://openalex.org/W3046554305","https://openalex.org/W3092529717","https://openalex.org/W3166091781","https://openalex.org/W3176164117","https://openalex.org/W4304084306","https://openalex.org/W4312394134","https://openalex.org/W4312933868","https://openalex.org/W4398202893","https://openalex.org/W4402727583"],"related_works":[],"abstract_inverted_index":{"Zero-shot":[0],"object":[1,26,51],"detection":[2,52],"(ZSD)":[3],"aims":[4,93],"to":[5,9,39,48,60,94],"leverage":[6],"semantic":[7,143],"descriptions":[8],"localize":[10],"and":[11,17,72,79,128,142,178],"recognize":[12],"objects":[13,96],"of":[14,69,97],"both":[15],"seen":[16],"unseen":[18],"classes.":[19],"Existing":[20],"ZSD":[21,107,148,191],"works":[22],"are":[23,31,36,57,150],"mainly":[24],"coarse-grained":[25],"detection,":[27],"where":[28,54],"the":[29,55,106,117,140,154,160],"classes":[30,56,99],"visually":[32],"quite":[33],"different,":[34],"thus":[35],"relatively":[37],"easy":[38],"distinguish.":[40],"However,":[41],"in":[42,103],"real":[43],"life":[44],"we":[45,77,158],"often":[46],"have":[47],"face":[49],"fine-grained":[50],"scenarios,":[53],"too":[58],"similar":[59],"be":[61],"easily":[62],"distinguished.":[63],"For":[64],"example,":[65],"detecting":[66],"different":[67,98],"kinds":[68],"birds,":[70],"fishes,":[71],"flowers.":[73],"In":[74],"this":[75],"paper,":[76],"propose":[78],"solve":[80],"a":[81,130],"new":[82,155],"problem":[83],"called":[84,114],"Fine-Grained":[85],"Zero-Shot":[86],"Object":[87],"Detection":[88],"(FG-ZSD":[89],"for":[90,116,153],"short),":[91],"which":[92,120,166],"detect":[95],"with":[100],"minute":[101],"differences":[102],"details":[104],"under":[105],"paradigm.":[108],"We":[109],"develop":[110],"an":[111,124],"effective":[112],"method":[113,188],"MSHC":[115],"FG-ZSD":[118,156,162],"task,":[119,157],"is":[121],"based":[122],"on":[123,183],"improved":[125],"two-stage":[126],"detector":[127],"employs":[129],"multi-level":[131],"semantics-aware":[132],"embedding":[133],"alignment":[134],"loss,":[135],"ensuring":[136],"tight":[137],"coupling":[138],"between":[139],"visual":[141],"spaces.":[144],"Considering":[145],"that":[146,186],"existing":[147,190],"datasets":[149],"not":[151],"suitable":[152],"build":[159],"first":[161],"benchmark":[163],"dataset":[164],"FGZSD-Birds,":[165],"contains":[167],"148,820":[168],"images":[169],"falling":[170],"into":[171],"36":[172],"orders,":[173],"140":[174],"families,":[175],"579":[176],"genera":[177],"1432":[179],"species.":[180],"Extensive":[181],"experiments":[182],"FGZSD-Birds":[184],"show":[185],"our":[187],"outperforms":[189],"models.":[192]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
