{"id":"https://openalex.org/W7138100988","doi":"https://doi.org/10.1609/aaai.v40i14.38174","title":"VK-Det: Visual Knowledge Guided Prototype Learning for Open-Vocabulary Aerial Object Detection","display_name":"VK-Det: Visual Knowledge Guided Prototype Learning for Open-Vocabulary Aerial Object Detection","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138100988","doi":"https://doi.org/10.1609/aaai.v40i14.38174"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i14.38174","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38174","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38174/42136","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38174/42136","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125904303","display_name":"Jianhang Yao","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianhang Yao","raw_affiliation_strings":["National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046296571","display_name":"Yongbin Zheng","orcid":"https://orcid.org/0000-0002-3631-7929"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongbin Zheng","raw_affiliation_strings":["National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129660186","display_name":"Siqi Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siqi Lu","raw_affiliation_strings":["National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129704267","display_name":"Wanying Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wanying Xu","raw_affiliation_strings":["National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129752067","display_name":"Peng Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Sun","raw_affiliation_strings":["National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5125904303"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.4077381,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"14","first_page":"11875","last_page":"11882"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8442000150680542,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8442000150680542,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.08070000261068344,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.041999999433755875,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7490000128746033},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.7175999879837036},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5443999767303467},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4846999943256378},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4510999917984009},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.41929998993873596},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4077000021934509},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.39010000228881836},{"id":"https://openalex.org/keywords/semantic-feature","display_name":"Semantic feature","score":0.37380000948905945}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7670999765396118},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7490000128746033},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.7175999879837036},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7098000049591064},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5443999767303467},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4846999943256378},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4659000039100647},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4510999917984009},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.41929998993873596},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4077000021934509},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.39010000228881836},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.37380000948905945},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.37369999289512634},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3700000047683716},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.36910000443458557},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3582000136375427},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.3562999963760376},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33820000290870667},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.3273000121116638},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.322299987077713},{"id":"https://openalex.org/C38785706","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Interest point detection","level":5,"score":0.299699991941452},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.290800005197525},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C182521987","wikidata":"https://www.wikidata.org/wiki/Q2493877","display_name":"Viola\u2013Jones object detection framework","level":5,"score":0.27320000529289246},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C105611402","wikidata":"https://www.wikidata.org/wiki/Q2976589","display_name":"Spectral clustering","level":3,"score":0.26600000262260437},{"id":"https://openalex.org/C2776429412","wikidata":"https://www.wikidata.org/wiki/Q4688011","display_name":"Aerial image","level":3,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i14.38174","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38174","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38174/42136","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i14.38174","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38174","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38174/42136","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3085993365","display_name":null,"funder_award_id":"(Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4829830226","display_name":null,"funder_award_id":"62273353","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5190875283","display_name":null,"funder_award_id":"6227335","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138100988.pdf","grobid_xml":"https://content.openalex.org/works/W7138100988.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"To":[0],"identify":[1],"objects":[2,121],"beyond":[3],"predefined":[4],"categories,":[5],"open-vocabulary":[6,54,65],"aerial":[7],"object":[8,66],"detection":[9,67,108],"(OVAD)":[10],"leverages":[11],"the":[12],"zero-shot":[13],"capabilities":[14],"of":[15],"visual-language":[16],"models":[17,99],"(VLMs)":[18],"to":[19,23,36,40,56,83,110,119],"generalize":[20],"from":[21],"base":[22],"novel":[24,94,120],"categories.":[25],"Existing":[26],"approaches":[27],"typically":[28],"utilize":[29],"self-learning":[30],"mechanisms":[31],"with":[32,43],"weak":[33],"text":[34,48],"supervision":[35],"generate":[37],"region-level":[38],"pseudo-labels":[39],"align":[41],"detectors":[42],"VLMs":[44],"semantic":[45,51],"spaces.":[46],"However,":[47],"dependence":[49],"induces":[50],"bias,":[52],"restricting":[53],"expansion":[55],"text-specified":[57],"concepts.":[58],"We":[59],"propose":[60],"VK-Det,":[61],"a":[62,93],"visual":[63],"knowledge-guided":[64],"framework":[68],"without":[69],"extra":[70,144],"supervision.":[71,126],"First,":[72],"we":[73,91],"discover":[74],"and":[75,87,106,137],"leverage":[76],"vision":[77],"encoder's":[78],"inherent":[79],"informative":[80],"region":[81],"perception":[82],"attain":[84],"fine-grained":[85],"localization":[86],"adaptive":[88],"distillation.":[89],"Second,":[90],"introduce":[92],"prototype-aware":[95],"pseudo-labeling":[96],"strategy.":[97],"It":[98],"inter-class":[100],"decision":[101],"boundaries":[102],"through":[103],"feature":[104],"clustering":[105],"maps":[107],"regions":[109],"latent":[111],"categories":[112],"via":[113],"prototype":[114],"matching.":[115],"This":[116],"enhances":[117],"attention":[118],"while":[122],"compensating":[123],"for":[124],"missing":[125],"Extensive":[127],"experiments":[128],"show":[129],"state-of-the-art":[130],"performance,":[131],"achieving":[132],"30.1":[133],"mAP\u1d3a":[134,139],"on":[135,140],"DIOR":[136],"23.3":[138],"DOTA,":[141],"outperforming":[142],"even":[143],"supervised":[145],"methods.":[146]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2026-03-18T00:00:00"}
