{"id":"https://openalex.org/W4385338579","doi":"https://doi.org/10.1109/tnnls.2023.3293484","title":"Fine-Grained Visual\u2013Text Prompt-Driven Self-Training for Open-Vocabulary Object Detection","display_name":"Fine-Grained Visual\u2013Text Prompt-Driven Self-Training for Open-Vocabulary Object Detection","publication_year":2023,"publication_date":"2023-07-28","ids":{"openalex":"https://openalex.org/W4385338579","doi":"https://doi.org/10.1109/tnnls.2023.3293484","pmid":"https://pubmed.ncbi.nlm.nih.gov/37506020"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2023.3293484","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3293484","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018425393","display_name":"Yanxin Long","orcid":"https://orcid.org/0000-0001-8784-8343"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yanxin Long","raw_affiliation_strings":["School of Intelligent Systems Engineering, Sun Yat-sen University at Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Intelligent Systems Engineering, Sun Yat-sen University at Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100297958","display_name":"Jianhua Han","orcid":null},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianhua Han","raw_affiliation_strings":["Huawei Noah&#x2019;s Ark Lab, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah&#x2019;s Ark Lab, Shanghai, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001888307","display_name":"Runhui Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runhui Huang","raw_affiliation_strings":["School of Intelligent Systems Engineering, Sun Yat-sen University at Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Intelligent Systems Engineering, Sun Yat-sen University at Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102619815","display_name":"Hang Xu","orcid":"https://orcid.org/0009-0006-0421-0576"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hang Xu","raw_affiliation_strings":["Huawei Noah&#x2019;s Ark Lab, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah&#x2019;s Ark Lab, Shanghai, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100434324","display_name":"Yi Zhu","orcid":"https://orcid.org/0000-0003-3000-3918"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Zhu","raw_affiliation_strings":["Huawei Noah&#x2019;s Ark Lab, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah&#x2019;s Ark Lab, Shanghai, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101082712","display_name":"Chunjing Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunjing Xu","raw_affiliation_strings":["Huawei Noah&#x2019;s Ark Lab, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah&#x2019;s Ark Lab, Shanghai, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047878798","display_name":"Xiaodan Liang","orcid":"https://orcid.org/0000-0003-3213-3062"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodan Liang","raw_affiliation_strings":["School of Intelligent Systems Engineering, Sun Yat-sen University at Shenzhen, Shenzhen, China","DarkMatter AI Research, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Intelligent Systems Engineering, Sun Yat-sen University at Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I157773358"]},{"raw_affiliation_string":"DarkMatter AI Research, Guangzhou, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5018425393"],"corresponding_institution_ids":["https://openalex.org/I157773358","https://openalex.org/I180726961"],"apc_list":null,"apc_paid":null,"fwci":2.455,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.91003162,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"35","issue":"11","first_page":"16277","last_page":"16287"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7045859098434448},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5992082357406616},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5447978377342224},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5254182815551758},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4410734176635742},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3352324962615967},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.15588179230690002}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7045859098434448},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5992082357406616},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5447978377342224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5254182815551758},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4410734176635742},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3352324962615967},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.15588179230690002},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2023.3293484","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3293484","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:37506020","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37506020","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8299999833106995,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G3246663240","display_name":null,"funder_award_id":"76160-12220011","funder_id":"https://openalex.org/F4320321160","funder_display_name":"Sun Yat-sen University"},{"id":"https://openalex.org/G5604801150","display_name":null,"funder_award_id":"JCYJ20190807154211365","funder_id":"https://openalex.org/F4320329791","funder_display_name":"Shenzhen Fundamental Research Program"},{"id":"https://openalex.org/G5668754008","display_name":null,"funder_award_id":"22lgqb38","funder_id":"https://openalex.org/F4320321160","funder_display_name":"Sun Yat-sen University"}],"funders":[{"id":"https://openalex.org/F4320321160","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71"},{"id":"https://openalex.org/F4320329791","display_name":"Shenzhen Fundamental Research Program","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2031489346","https://openalex.org/W2187089797","https://openalex.org/W2747623286","https://openalex.org/W2796354110","https://openalex.org/W2948672349","https://openalex.org/W2962858109","https://openalex.org/W2963150697","https://openalex.org/W2963936013","https://openalex.org/W2968124245","https://openalex.org/W2970476646","https://openalex.org/W2983943451","https://openalex.org/W2985891137","https://openalex.org/W2997998901","https://openalex.org/W3034199269","https://openalex.org/W3046554305","https://openalex.org/W3088431063","https://openalex.org/W3090449556","https://openalex.org/W3109283569","https://openalex.org/W3129576130","https://openalex.org/W3134582802","https://openalex.org/W3166396011","https://openalex.org/W3172642864","https://openalex.org/W3173859428","https://openalex.org/W3174770825","https://openalex.org/W3176659256","https://openalex.org/W3185341429","https://openalex.org/W3198377975","https://openalex.org/W3200114289","https://openalex.org/W3201221410","https://openalex.org/W3204250462","https://openalex.org/W3205789812","https://openalex.org/W3206072662","https://openalex.org/W3212248244","https://openalex.org/W4226177592","https://openalex.org/W4283764477","https://openalex.org/W4284961860","https://openalex.org/W4288325606","https://openalex.org/W4294982692","https://openalex.org/W4295332732","https://openalex.org/W4312424618","https://openalex.org/W4312559104","https://openalex.org/W4312960937","https://openalex.org/W6620707391","https://openalex.org/W6752354721","https://openalex.org/W6767279747","https://openalex.org/W6782942446","https://openalex.org/W6790019176","https://openalex.org/W6791276965","https://openalex.org/W6791353385","https://openalex.org/W6792279967","https://openalex.org/W6798805250","https://openalex.org/W6800895557","https://openalex.org/W6801567822","https://openalex.org/W6801948084","https://openalex.org/W6802347785","https://openalex.org/W6802517928","https://openalex.org/W6804065392","https://openalex.org/W6810814029","https://openalex.org/W6841021368"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Inspired":[0],"by":[1,23],"the":[2,25,43,57,70,102,113,144,149,156,162,166,175],"success":[3],"of":[4,18,28,60,72,188],"vision-language":[5],"methods":[6],"(VLMs)":[7],"in":[8,37],"zero-shot":[9],"classification,":[10],"recent":[11],"works":[12],"attempt":[13],"to":[14,100,119,127,142,152,159,165],"extend":[15],"this":[16,75],"line":[17],"work":[19],"into":[20],"object":[21,66,180],"detection":[22,90],"leveraging":[24],"localization":[26],"ability":[27],"pretrained":[29,48,163],"VLMs":[30,45],"and":[31],"generating":[32],"pseudolabels":[33],"for":[34,65,88,155,178],"unseen":[35,186],"classes":[36,187],"a":[38,79,94,107,138],"self-training":[39,86,104],"manner.":[40],"However,":[41],"since":[42],"current":[44,103],"are":[46],"usually":[47],"with":[49,53,106],"aligning":[50],"sentence":[51],"embedding":[52],"global":[54],"image":[55],"embedding,":[56],"direct":[58],"use":[59],"them":[61],"lacks":[62],"fine-grained":[63,83,95,110,121],"alignment":[64,122],"instances,":[67],"which":[68],"is":[69],"core":[71],"detection.":[73],"In":[74],"article,":[76],"we":[77,116,136],"propose":[78,137],"simple":[80],"but":[81],"effective":[82],"visual-text":[84,96],"prompt-driven":[85],"paradigm":[87,105],"open-vocabulary":[89,179],"(VTP-OVD)":[91],"that":[92,171],"introduces":[93],"prompt":[97,140],"adapting":[98,114],"stage":[99],"enhance":[101],"more":[108],"powerful":[109],"alignment.":[111],"During":[112],"stage,":[115],"enable":[117],"VLM":[118,164],"obtain":[120],"using":[123],"learnable":[124],"text":[125],"prompts":[126],"resolve":[128],"an":[129],"auxiliary":[130],"dense":[131],"pixelwise":[132],"prediction":[133],"task.":[134],"Furthermore,":[135],"visual":[139],"module":[141],"provide":[143],"prior":[145],"task":[146],"information":[147],"(i.e.,":[148],"categories":[150],"need":[151],"be":[153],"predicted)":[154],"vision":[157],"branch":[158],"better":[160],"adapt":[161],"downstream":[167],"tasks.":[168],"Experiments":[169],"show":[170],"our":[172],"method":[173],"achieves":[174],"state-of-the-art":[176],"performance":[177],"detection,":[181],"e.g.,":[182],"31.5%":[183],"mAP":[184],"on":[185],"COCO.":[189]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":11}],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
