{"id":"https://openalex.org/W4403780846","doi":"https://doi.org/10.1145/3664647.3681559","title":"WaveDN: A Wavelet-based Training-free Zero-shot Enhancement for Vision-Language Models","display_name":"WaveDN: A Wavelet-based Training-free Zero-shot Enhancement for Vision-Language Models","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403780846","doi":"https://doi.org/10.1145/3664647.3681559"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681559","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681559","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113662002","display_name":"Jiayan Li","orcid":"https://orcid.org/0000-0001-7497-712X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiulin Li","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101690665","display_name":"Mengyu Yang","orcid":"https://orcid.org/0000-0001-7832-0926"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengyu Yang","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101561788","display_name":"Ye Tian","orcid":"https://orcid.org/0000-0002-6683-5524"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Tian","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051221338","display_name":"Lanshan Zhang","orcid":"https://orcid.org/0000-0002-0674-7864"},"institutions":[{"id":"https://openalex.org/I4210108629","display_name":"Computer Network Information Center","ror":"https://ror.org/01s0wyf50","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210108629"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lanshan Zhang","raw_affiliation_strings":["Beijing Key Laboratory of Network System and Network Culture, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Network System and Network Culture, Beijing, China","institution_ids":["https://openalex.org/I4210108629"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110238659","display_name":"Yongchun Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongchun Lu","raw_affiliation_strings":["School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012161482","display_name":"Jinsheng Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jice Liu","raw_affiliation_strings":["School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100329067","display_name":"Wendong Wang","orcid":"https://orcid.org/0000-0002-6418-8087"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wendong Wang","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5113662002"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.5499,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.671949,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4273","last_page":"4282"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.6840385794639587},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6598536968231201},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6440414786338806},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6431640386581421},{"id":"https://openalex.org/keywords/wavelet","display_name":"Wavelet","score":0.5547705888748169},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5260357856750488},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.46677106618881226},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.07069993019104004},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.06968393921852112}],"concepts":[{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.6840385794639587},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6598536968231201},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6440414786338806},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6431640386581421},{"id":"https://openalex.org/C47432892","wikidata":"https://www.wikidata.org/wiki/Q831390","display_name":"Wavelet","level":2,"score":0.5547705888748169},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5260357856750488},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.46677106618881226},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.07069993019104004},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.06968393921852112},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681559","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681559","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1846799578","https://openalex.org/W1905882502","https://openalex.org/W2017814585","https://openalex.org/W2038484192","https://openalex.org/W2047643928","https://openalex.org/W2108598243","https://openalex.org/W2138011018","https://openalex.org/W2194775991","https://openalex.org/W2533598788","https://openalex.org/W2568262903","https://openalex.org/W2753192188","https://openalex.org/W2938830017","https://openalex.org/W2964194231","https://openalex.org/W3118410762","https://openalex.org/W3118608800","https://openalex.org/W3135367836","https://openalex.org/W3166396011","https://openalex.org/W3176445421","https://openalex.org/W3176663332","https://openalex.org/W3184456930","https://openalex.org/W3184735396","https://openalex.org/W4212774754","https://openalex.org/W4297808394","https://openalex.org/W4304092183","https://openalex.org/W4312877428","https://openalex.org/W4382458283","https://openalex.org/W4385801050","https://openalex.org/W4386057724","https://openalex.org/W4386075561","https://openalex.org/W4386075814","https://openalex.org/W4387969733","https://openalex.org/W4390872642","https://openalex.org/W4390874269","https://openalex.org/W4392341509","https://openalex.org/W4398958419","https://openalex.org/W6800895557"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2366718574","https://openalex.org/W2359774528","https://openalex.org/W4298312966","https://openalex.org/W2325697621"],"abstract_inverted_index":{"Vision-Language":[0],"Models":[1],"(VLMs)":[2],"built":[3],"on":[4,40,79,100,167],"contrastive":[5],"learning,":[6],"such":[7],"as":[8],"CLIP,":[9],"demonstrate":[10],"great":[11],"transferability":[12,76],"and":[13,21,64,179],"excel":[14],"in":[15,49],"downstream":[16,41,55,101,169],"tasks":[17,56,102],"like":[18],"zero-shot":[19],"classification":[20],"retrieval.":[22],"To":[23,82],"further":[24],"enhance":[25],"the":[26,65,74,97,115,118,130,138,142,146,156,162],"performance":[27,99],"of":[28,67,77,117,133,141,158,174],"VLMs,":[29],"existing":[30,75],"methods":[31,45],"have":[32],"introduced":[33],"additional":[34,68],"parameter":[35,69],"modules":[36,70,105],"or":[37,60,106],"fine-tuned":[38],"VLMs":[39,78],"datasets.":[42],"However,":[43],"these":[44],"often":[46],"fall":[47],"short":[48],"scenarios":[50],"where":[51],"labeled":[52,107],"data":[53],"for":[54,62],"is":[57],"either":[58],"unavailable":[59],"insufficient":[61],"fine-tuning,":[63],"training":[66],"may":[71],"considerably":[72],"impair":[73],"open-set":[80],"tasks.":[81],"alleviate":[83],"this":[84],"issue,":[85],"we":[86],"introduce":[87],"WaveDN,":[88],"a":[89,126,172],"wavelet-based":[90],"distribution":[91],"normalization":[92,128],"method":[93],"that":[94],"can":[95],"boost":[96],"VLMs'":[98],"without":[103],"parametric":[104],"data.":[108,144],"Initially,":[109],"wavelet":[110,131,153],"distributions":[111],"are":[112,149],"extracted":[113],"from":[114],"embeddings":[116,148],"sampled,":[119],"unlabeled":[120],"test":[121,143],"samples.":[122,163],"Subsequently,":[123],"WaveDN":[124,183],"conducts":[125],"hierarchical":[127],"across":[129],"coefficients":[132],"all":[134],"embeddings,":[135],"thereby":[136],"incorporating":[137],"distributional":[139],"characteristics":[140],"Finally,":[145],"normalized":[147],"reconstructed":[150],"via":[151],"inverse":[152],"transformation,":[154],"facilitating":[155],"computation":[157],"similarity":[159],"metrics":[160],"between":[161],"Through":[164],"extensive":[165],"experiments":[166],"two":[168],"tasks,":[170],"using":[171],"total":[173],"14":[175],"datasets":[176],"covering":[177],"text-image":[178],"text-audio":[180],"modal":[181],"data,":[182],"has":[184],"demonstrated":[185],"superiority":[186],"compared":[187],"to":[188],"state-of-the-art":[189],"methods.":[190]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
