{"id":"https://openalex.org/W4405717678","doi":"https://doi.org/10.1109/tip.2024.3518759","title":"Rebalanced Vision-Language Retrieval Considering Structure-Aware Distillation","display_name":"Rebalanced Vision-Language Retrieval Considering Structure-Aware Distillation","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4405717678","doi":"https://doi.org/10.1109/tip.2024.3518759","pmid":"https://pubmed.ncbi.nlm.nih.gov/40030596"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2024.3518759","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2024.3518759","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100397623","display_name":"Yang Yang","orcid":"https://orcid.org/0000-0002-5245-3584"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yang Yang","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112958262","display_name":"Wenjuan Xi","orcid":null},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjuan Xi","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100643784","display_name":"Luping Zhou","orcid":"https://orcid.org/0000-0001-8762-2424"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Luping Zhou","raw_affiliation_strings":["School of Electrical and Information Engineering, The University of Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Information Engineering, The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035112538","display_name":"Jinhui Tang","orcid":"https://orcid.org/0000-0001-9008-222X"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinhui Tang","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100397623"],"corresponding_institution_ids":["https://openalex.org/I36399199"],"apc_list":null,"apc_paid":null,"fwci":0.2493,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.56929155,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"33","issue":null,"first_page":"6881","last_page":"6892"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6624327301979065},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5613049864768982},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.5546702146530151},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.49820923805236816},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46918296813964844},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4397718012332916},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38258421421051025},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.32549816370010376},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.2560684084892273},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.11201280355453491}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6624327301979065},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5613049864768982},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5546702146530151},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.49820923805236816},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46918296813964844},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4397718012332916},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38258421421051025},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32549816370010376},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2560684084892273},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.11201280355453491},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2024.3518759","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2024.3518759","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:40030596","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40030596","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G977746827","display_name":null,"funder_award_id":"30922010317","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W1997107867","https://openalex.org/W2048997552","https://openalex.org/W2064675550","https://openalex.org/W2155803963","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2467557055","https://openalex.org/W2962964995","https://openalex.org/W2963140444","https://openalex.org/W2964081303","https://openalex.org/W2970231061","https://openalex.org/W2986670728","https://openalex.org/W2988823324","https://openalex.org/W2989840903","https://openalex.org/W3005881764","https://openalex.org/W3035333188","https://openalex.org/W3035454331","https://openalex.org/W3035605030","https://openalex.org/W3104180728","https://openalex.org/W3107593279","https://openalex.org/W3110019360","https://openalex.org/W3111984153","https://openalex.org/W3118694826","https://openalex.org/W3138516171","https://openalex.org/W3159952799","https://openalex.org/W3176157254","https://openalex.org/W3187464273","https://openalex.org/W3213100861","https://openalex.org/W4206420686","https://openalex.org/W4224304134","https://openalex.org/W4226017195","https://openalex.org/W4282937884","https://openalex.org/W4283812943","https://openalex.org/W4288089799","https://openalex.org/W4312639100","https://openalex.org/W4312761738","https://openalex.org/W4321488152","https://openalex.org/W4365801687","https://openalex.org/W4377711491","https://openalex.org/W4386071757","https://openalex.org/W4402979766","https://openalex.org/W6684711245","https://openalex.org/W6695455153","https://openalex.org/W6747225742","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6766904570","https://openalex.org/W6769627184","https://openalex.org/W6788887552","https://openalex.org/W6791353385","https://openalex.org/W6796824943","https://openalex.org/W6797528369","https://openalex.org/W6798805250","https://openalex.org/W6804095316","https://openalex.org/W6809497388","https://openalex.org/W6809665764","https://openalex.org/W6811013733","https://openalex.org/W6839415613"],"related_works":["https://openalex.org/W3026162553","https://openalex.org/W2344382886","https://openalex.org/W19111321","https://openalex.org/W2412887479","https://openalex.org/W32245304","https://openalex.org/W2953684491","https://openalex.org/W4285338581","https://openalex.org/W2768175398","https://openalex.org/W2015158429","https://openalex.org/W2080290893"],"abstract_inverted_index":{"Vision-language":[0],"retrieval":[1,73,94,204,212],"aims":[2],"to":[3,21,46,58,117,141,215],"search":[4],"for":[5,92],"similar":[6],"instances":[7,102],"in":[8,26,66,103],"one":[9],"modality":[10,42,54],"based":[11],"on":[12,72,197],"queries":[13],"from":[14],"another":[15],"modality.":[16],"The":[17,68,99],"primary":[18],"objective":[19],"is":[20,37,89,107],"learn":[22],"cross-modal":[23,35,87,93,118,143,156,164,169,203],"matching":[24,36,88,144,148,157,165,170,185],"representations":[25,186,189],"a":[27,63,115,136,153],"latent":[28],"common":[29,64,105],"space.":[30],"Actually,":[31],"the":[32,48,104,127,163,168,175,180,191,201,216],"assumption":[33],"underlying":[34],"modal":[38,59],"balance,":[39],"where":[40],"each":[41],"contains":[43],"sufficient":[44],"information":[45],"represent":[47],"others.":[49],"However,":[50],"noise":[51],"interference":[52],"and":[53,187],"insufficiency":[55],"often":[56],"lead":[57],"imbalance,":[60],"making":[61],"it":[62],"phenomenon":[65],"practice.":[67],"impact":[69],"of":[70,101,129,206],"imbalance":[71],"performance":[74,205],"remains":[75],"an":[76],"open":[77],"question.":[78],"In":[79],"this":[80,123],"paper,":[81],"we":[82,125,134,151],"first":[83],"demonstrate":[84],"that":[85,158],"ultimate":[86],"generally":[90],"sub-optimal":[91],"when":[95,110],"imbalanced":[96,112],"modalities":[97],"exist.":[98],"structure":[100],"space":[106],"inherently":[108],"influenced":[109],"facing":[111],"modalities,":[113],"posing":[114],"challenge":[116],"similarity":[119],"measurement.":[120],"To":[121],"address":[122],"issue,":[124],"emphasize":[126],"importance":[128],"meaningful":[130],"structure-preserved":[131,147],"matching.":[132,194],"Accordingly,":[133],"propose":[135],"simple":[137],"yet":[138],"effective":[139],"method":[140],"rebalance":[142],"by":[145],"learning":[146],"representations.":[149],"Specifically,":[150],"design":[152],"novel":[154],"multi-granularity":[155],"incorporates":[159],"structure-aware":[160,176],"distillation":[161,177],"alongside":[162],"loss.":[166],"While":[167],"loss":[171],"constraints":[172],"instance-level":[173],"matching,":[174],"further":[178],"regularizes":[179],"geometric":[181],"consistency":[182],"between":[183],"learned":[184],"intra-modal":[188],"through":[190],"developed":[192],"relational":[193],"Extensive":[195],"experiments":[196],"different":[198],"datasets":[199],"affirm":[200],"superior":[202],"our":[207],"approach,":[208],"simultaneously":[209],"enhancing":[210],"single-modal":[211],"capabilities":[213],"compared":[214],"baseline":[217],"models.":[218]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
