{"id":"https://openalex.org/W4399205998","doi":"https://doi.org/10.1109/tcsvt.2024.3407785","title":"Visual Grounding With Dual Knowledge Distillation","display_name":"Visual Grounding With Dual Knowledge Distillation","publication_year":2024,"publication_date":"2024-05-31","ids":{"openalex":"https://openalex.org/W4399205998","doi":"https://doi.org/10.1109/tcsvt.2024.3407785"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3407785","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3407785","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017356141","display_name":"Wansen Wu","orcid":"https://orcid.org/0000-0002-0467-3830"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wansen Wu","raw_affiliation_strings":["College of Systems Engineering, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Systems Engineering, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067002890","display_name":"Meng Cao","orcid":"https://orcid.org/0000-0002-8946-4228"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Cao","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053458494","display_name":"Yue Hu","orcid":"https://orcid.org/0000-0002-8115-7020"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Hu","raw_affiliation_strings":["College of Systems Engineering, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Systems Engineering, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106551871","display_name":"Yong Peng","orcid":"https://orcid.org/0000-0001-5803-7437"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Peng","raw_affiliation_strings":["College of Systems Engineering, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Systems Engineering, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016486530","display_name":"Long Qin","orcid":"https://orcid.org/0000-0003-1245-6622"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Qin","raw_affiliation_strings":["College of Systems Engineering, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Systems Engineering, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100610160","display_name":"Quanjun Yin","orcid":"https://orcid.org/0000-0002-1207-8660"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quanjun Yin","raw_affiliation_strings":["College of Systems Engineering, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Systems Engineering, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5017356141"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":2.4192,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.89881127,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"34","issue":"10","first_page":"10399","last_page":"10410"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.78504478931427},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6227705478668213},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.5340463519096375},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5291113257408142},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5160400867462158},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.5082037448883057},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.47015881538391113},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4341239035129547},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3818318843841553},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32530301809310913},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.27413463592529297},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.09349757432937622},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09241777658462524}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.78504478931427},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6227705478668213},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5340463519096375},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5291113257408142},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5160400867462158},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.5082037448883057},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.47015881538391113},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4341239035129547},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3818318843841553},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32530301809310913},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.27413463592529297},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.09349757432937622},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09241777658462524},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3407785","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3407785","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G230174192","display_name":null,"funder_award_id":"2023JJ40676","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3457895640","display_name":null,"funder_award_id":"62103425","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G461173008","display_name":null,"funder_award_id":"62103420","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4653491615","display_name":null,"funder_award_id":"62103428","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4911706857","display_name":null,"funder_award_id":"2021JJ40697","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7663206643","display_name":null,"funder_award_id":"2021JJ40702","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8092987286","display_name":null,"funder_award_id":"62306329","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W1821462560","https://openalex.org/W1861492603","https://openalex.org/W2251512949","https://openalex.org/W2489434015","https://openalex.org/W2558535589","https://openalex.org/W2606473278","https://openalex.org/W2770129969","https://openalex.org/W2946086442","https://openalex.org/W2962764817","https://openalex.org/W2963109634","https://openalex.org/W2963735856","https://openalex.org/W2963914122","https://openalex.org/W2964022527","https://openalex.org/W2964345792","https://openalex.org/W2984121207","https://openalex.org/W2986803748","https://openalex.org/W2987734933","https://openalex.org/W3021007069","https://openalex.org/W3096609285","https://openalex.org/W3110435696","https://openalex.org/W3165695488","https://openalex.org/W3173364567","https://openalex.org/W3174004334","https://openalex.org/W3216551675","https://openalex.org/W4214490042","https://openalex.org/W4226452284","https://openalex.org/W4285283873","https://openalex.org/W4304098330","https://openalex.org/W4309181071","https://openalex.org/W4312274522","https://openalex.org/W4312351586","https://openalex.org/W4312377093","https://openalex.org/W4312845548","https://openalex.org/W4313011746","https://openalex.org/W4313145013","https://openalex.org/W4367050941","https://openalex.org/W4380303580","https://openalex.org/W4380609723","https://openalex.org/W4385245566","https://openalex.org/W4386066401","https://openalex.org/W4386076522","https://openalex.org/W4389053170","https://openalex.org/W6618372016","https://openalex.org/W6750227808","https://openalex.org/W6755207826","https://openalex.org/W6757135208","https://openalex.org/W6779613827","https://openalex.org/W6789753369","https://openalex.org/W6791353385","https://openalex.org/W6798350552","https://openalex.org/W6802517928","https://openalex.org/W6810124100","https://openalex.org/W6811013733"],"related_works":["https://openalex.org/W2750434199","https://openalex.org/W2129428289","https://openalex.org/W2347374138","https://openalex.org/W2050635624","https://openalex.org/W2101447046","https://openalex.org/W2091753323","https://openalex.org/W2168037874","https://openalex.org/W2135728080","https://openalex.org/W2905216489","https://openalex.org/W2119303512"],"abstract_inverted_index":{"Visual":[0],"grounding":[1,132,256],"is":[2],"a":[3,19,53,79,124,161,174,181,215,251],"task":[4],"that":[5],"seeks":[6],"to":[7,81,97,107,134,155,160,188,198,204,220,250],"predict":[8],"the":[9,26,62,74,83,88,93,103,113,136,148,152,157,166,185,194,200,205,234],"specific":[10],"location":[11],"of":[12,65,237,254],"an":[13,23],"object":[14],"or":[15],"region":[16,202],"described":[17],"by":[18,112,192],"linguistic":[20],"expression":[21],"within":[22],"image.":[24],"Despite":[25],"recent":[27,114],"success,":[28],"existing":[29,69],"methods":[30,38],"still":[31],"suffer":[32],"from":[33],"two":[34],"problems.":[35],"First,":[36],"most":[37],"use":[39],"independently":[40],"pre-trained":[41],"unimodal":[42,58],"feature":[43,48],"encoders":[44],"for":[45,130,184],"extracting":[46],"expressive":[47],"embeddings,":[49],"thus":[50],"resulting":[51],"in":[52,87,116,119,164],"significant":[54],"semantic":[55,94,138,158],"gap":[56,139],"between":[57,99],"embeddings":[59],"and":[60,102,140,168,230,239],"limiting":[61],"effective":[63],"interaction":[64],"visual-linguistic":[66],"contexts.":[67],"Second,":[68],"attention-based":[70],"approaches":[71],"equipped":[72],"with":[73,242],"global":[75],"receptive":[76],"field":[77],"have":[78],"tendency":[80],"neglect":[82],"local":[84],"information":[85],"present":[86],"images.":[89],"This":[90],"limitation":[91],"restricts":[92],"understanding":[95],"required":[96],"distinguish":[98],"referred":[100],"objects":[101],"background,":[104],"consequently":[105],"leading":[106],"inadequate":[108],"localization":[109,142,190],"performance.":[110],"Inspired":[111],"advance":[115],"knowledge":[117,159,191],"distillation,":[118],"this":[120,209,211],"paper,":[121],"we":[122,146,179],"propose":[123],"DUal":[125],"knowlEdge":[126],"disTillation":[127],"(DUET)":[128],"method":[129,183],"visual":[131,255],"models":[133],"bridge":[135],"cross-modal":[137],"improve":[141],"performance":[143,236],"simultaneously.":[144],"Specifically,":[145],"utilize":[147],"CLIP":[149],"model":[150,154,187],"as":[151],"teacher":[153],"transfer":[156],"student":[162,186,244],"model,":[163],"which":[165],"vision":[167],"language":[169],"modalities":[170],"are":[171,260],"linked":[172],"into":[173],"unified":[175],"embedding":[176],"space.":[177],"Besides,":[178],"design":[180],"self-distillation":[182,223],"acquire":[189],"performing":[193],"region-level":[195],"contrastive":[196],"learning":[197],"make":[199],"predicted":[201],"close":[203],"positive":[206],"samples.":[207,224],"To":[208],"end,":[210],"work":[212],"further":[213],"proposes":[214],"Semantics-Location":[216],"Aware":[217],"sampling":[218],"mechanism":[219],"generate":[221],"high-quality":[222],"Extensive":[225],"experiments":[226],"on":[227,262],"five":[228],"datasets":[229],"ablation":[231],"studies":[232],"demonstrate":[233],"state-of-the-art":[235],"DUET":[238,248],"its":[240],"orthogonality":[241],"different":[243],"models,":[245],"thereby":[246],"making":[247],"adaptable":[249],"wide":[252],"range":[253],"architectures.":[257],"Our":[258],"code":[259],"available":[261],"DUET.":[263]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
