{"id":"https://openalex.org/W7164822722","doi":"https://doi.org/10.1145/3805622.3810841","title":"Beyond Post-hoc Fusion: Rethinking Cross-Modal Interaction Timing in Few-Shot Learning","display_name":"Beyond Post-hoc Fusion: Rethinking Cross-Modal Interaction Timing in Few-Shot Learning","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164822722","doi":"https://doi.org/10.1145/3805622.3810841"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810841","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810841","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810841","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5138685289","display_name":"Liang Yang","orcid":"https://orcid.org/0009-0000-0927-2991"},"institutions":[{"id":"https://openalex.org/I61057504","display_name":"Fujian Agriculture and Forestry University","ror":"https://ror.org/04kx2sy84","country_code":"CN","type":"education","lineage":["https://openalex.org/I61057504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Yang","raw_affiliation_strings":["College of Computer and Information Sciences, Fujian Agriculture and Forestry University, Fuzhou, China"],"raw_orcid":"https://orcid.org/0009-0000-0927-2991","affiliations":[{"raw_affiliation_string":"College of Computer and Information Sciences, Fujian Agriculture and Forestry University, Fuzhou, China","institution_ids":["https://openalex.org/I61057504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138664502","display_name":"Hongyuan Xiao","orcid":"https://orcid.org/0009-0006-7031-8614"},"institutions":[{"id":"https://openalex.org/I61057504","display_name":"Fujian Agriculture and Forestry University","ror":"https://ror.org/04kx2sy84","country_code":"CN","type":"education","lineage":["https://openalex.org/I61057504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongyuan Xiao","raw_affiliation_strings":["College of Computer and Information Sciences, Fujian Agriculture and Forestry University, Fuzhou, China"],"raw_orcid":"https://orcid.org/0009-0006-7031-8614","affiliations":[{"raw_affiliation_string":"College of Computer and Information Sciences, Fujian Agriculture and Forestry University, Fuzhou, China","institution_ids":["https://openalex.org/I61057504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109395825","display_name":"Songtao He","orcid":null},"institutions":[{"id":"https://openalex.org/I61057504","display_name":"Fujian Agriculture and Forestry University","ror":"https://ror.org/04kx2sy84","country_code":"CN","type":"education","lineage":["https://openalex.org/I61057504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Songtao He","raw_affiliation_strings":["College of Computer and Information Sciences, Fujian Agriculture and Forestry University, Fuzhou, China"],"raw_orcid":"https://orcid.org/0009-0006-7244-4817","affiliations":[{"raw_affiliation_string":"College of Computer and Information Sciences, Fujian Agriculture and Forestry University, Fuzhou, China","institution_ids":["https://openalex.org/I61057504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138630192","display_name":"Ye Lin","orcid":"https://orcid.org/0009-0002-1426-9644"},"institutions":[{"id":"https://openalex.org/I61057504","display_name":"Fujian Agriculture and Forestry University","ror":"https://ror.org/04kx2sy84","country_code":"CN","type":"education","lineage":["https://openalex.org/I61057504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Lin","raw_affiliation_strings":["College of Computer and Information Sciences, Fujian Agriculture and Forestry University, Fuzhou, China"],"raw_orcid":"https://orcid.org/0009-0002-1426-9644","affiliations":[{"raw_affiliation_string":"College of Computer and Information Sciences, Fujian Agriculture and Forestry University, Fuzhou, China","institution_ids":["https://openalex.org/I61057504"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025541045","display_name":"Z. Zhang","orcid":"https://orcid.org/0000-0002-8782-9414"},"institutions":[{"id":"https://openalex.org/I61057504","display_name":"Fujian Agriculture and Forestry University","ror":"https://ror.org/04kx2sy84","country_code":"CN","type":"education","lineage":["https://openalex.org/I61057504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenchang Zhang","raw_affiliation_strings":["College of Computer and Information Sciences, Fujian Agriculture and Forestry University, Fuzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-8782-9414","affiliations":[{"raw_affiliation_string":"College of Computer and Information Sciences, Fujian Agriculture and Forestry University, Fuzhou, China","institution_ids":["https://openalex.org/I61057504"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.9357255,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"986","last_page":"995"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7864000201225281,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7864000201225281,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.12890000641345978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.014800000004470348,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5597000122070312},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5060999989509583},{"id":"https://openalex.org/keywords/semantic-feature","display_name":"Semantic feature","score":0.5013999938964844},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4408000111579895},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.42080000042915344},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.39089998602867126},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.38920000195503235},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.36160001158714294},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.3377000093460083}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7159000039100647},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.636900007724762},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5597000122070312},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5060999989509583},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.5013999938964844},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49619999527931213},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4408000111579895},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.42080000042915344},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.39089998602867126},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.38920000195503235},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.36160001158714294},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.3377000093460083},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.32670000195503235},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.31610000133514404},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C198942812","wikidata":"https://www.wikidata.org/wiki/Q496618","display_name":"Semantic property","level":2,"score":0.2572999894618988},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25189998745918274},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810841","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810841","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810841","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810841","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6650741100311279,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1977295328","https://openalex.org/W2017814585","https://openalex.org/W2047643928","https://openalex.org/W2108598243","https://openalex.org/W2138011018","https://openalex.org/W2194775991","https://openalex.org/W2533598788","https://openalex.org/W2619383789","https://openalex.org/W2963140444","https://openalex.org/W2964194231","https://openalex.org/W2997573100","https://openalex.org/W3065974826","https://openalex.org/W3108975329","https://openalex.org/W3145450063","https://openalex.org/W3159481202","https://openalex.org/W3198377975","https://openalex.org/W4312310776","https://openalex.org/W4312639100","https://openalex.org/W4313156423","https://openalex.org/W4313175608","https://openalex.org/W4319300023","https://openalex.org/W4382458283","https://openalex.org/W4386071547","https://openalex.org/W4386075985","https://openalex.org/W4386790226","https://openalex.org/W4390874269","https://openalex.org/W4402727091","https://openalex.org/W4402727780","https://openalex.org/W4409367907","https://openalex.org/W4413145066","https://openalex.org/W4413156952"],"related_works":[],"abstract_inverted_index":{"Pre-trained":[0],"vision\u2013language":[1],"models":[2],"(VLMs),":[3],"such":[4],"as":[5],"CLIP,":[6],"have":[7],"become":[8],"a":[9,26],"strong":[10],"foundation":[11],"for":[12],"few-shot":[13],"learning.":[14],"Despite":[15],"this":[16,67],"success,":[17],"most":[18],"existing":[19],"adaptation":[20],"methods":[21],"restrict":[22],"cross-modal":[23,77,116],"interaction":[24,78],"to":[25,82,104],"post-hoc":[27],"stage,":[28,53],"where":[29],"visual":[30,94,135],"and":[31,37,107,114,134],"semantic":[32,45,99,102],"representations":[33],"are":[34,155],"extracted":[35],"independently":[36],"fused":[38],"only":[39,49],"at":[40,50],"the":[41,51,54,126],"logit":[42],"level.":[43],"When":[44],"guidance":[46],"is":[47],"applied":[48],"decision":[52],"underlying":[55],"feature":[56,109],"space":[57],"remains":[58],"largely":[59],"unchanged,":[60],"limiting":[61],"effective":[62],"task-specific":[63],"adaptation.":[64],"To":[65,111],"address":[66],"limitation,":[68],"we":[69,118],"propose":[70],"Dynamic":[71],"Cross-Modal":[72],"Interplay":[73],"(DCI),":[74],"which":[75,92,124],"advances":[76],"from":[79],"reactive":[80],"fusion":[81],"proactive":[83],"representation":[84,95],"construction.":[85],"DCI":[86],"introduces":[87],"Semantic-Guided":[88],"Visual":[89],"Calibration":[90],"(SGVC),":[91],"conditions":[93],"construction":[96],"on":[97,139,157],"instance-adaptive":[98],"anchors,":[100],"enabling":[101],"priors":[103],"directly":[105],"modulate":[106],"refine":[108],"geometry.":[110],"ensure":[112],"stable":[113],"balanced":[115],"interaction,":[117],"further":[119],"incorporate":[120],"Structure-Aware":[121],"Regularization":[122],"(SAR),":[123],"regularizes":[125],"calibration":[127],"process":[128],"by":[129],"preserving":[130],"inter-sample":[131],"relational":[132],"geometry":[133],"discriminability.":[136],"Extensive":[137],"experiments":[138],"several":[140],"widely":[141],"used":[142],"benchmarks":[143],"demonstrate":[144],"consistent":[145],"improvements":[146,154],"over":[147],"state-of-the-art":[148],"methods.":[149],"Further":[150],"analysis":[151],"shows":[152],"that":[153],"concentrated":[156],"semantically":[158],"ambiguous":[159],"samples.":[160]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
