{"id":"https://openalex.org/W7164850190","doi":"https://doi.org/10.1145/3805622.3810643","title":"CCRA: A Cross-modal Complementary Representation Alignment Framework for Bridging the Modality Gap","display_name":"CCRA: A Cross-modal Complementary Representation Alignment Framework for Bridging the Modality Gap","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164850190","doi":"https://doi.org/10.1145/3805622.3810643"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810643","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810643","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810643","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5138629039","display_name":"Xingchen Han","orcid":"https://orcid.org/0009-0002-2005-6899"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingchen Han","raw_affiliation_strings":["Northeastern University, shenyang, China"],"raw_orcid":"https://orcid.org/0009-0002-2005-6899","affiliations":[{"raw_affiliation_string":"Northeastern University, shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022027430","display_name":"Ruihao Zhang","orcid":"https://orcid.org/0000-0002-3562-2382"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruihao Zhang","raw_affiliation_strings":["Northeastern University, shenyang, China"],"raw_orcid":"https://orcid.org/0009-0008-3180-0218","affiliations":[{"raw_affiliation_string":"Northeastern University, shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138676318","display_name":"Ruiting Li","orcid":"https://orcid.org/0009-0001-4239-3880"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruiting Li","raw_affiliation_strings":["Northeastern University, shenyang, China"],"raw_orcid":"https://orcid.org/0009-0001-4239-3880","affiliations":[{"raw_affiliation_string":"Northeastern University, shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021682545","display_name":"Y. P. Pei","orcid":"https://orcid.org/0000-0002-5913-1641"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingxin Pei","raw_affiliation_strings":["Northeastern University, shenyang, China"],"raw_orcid":"https://orcid.org/0009-0002-0913-8367","affiliations":[{"raw_affiliation_string":"Northeastern University, shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108047889","display_name":"Yi-Xiang Wang","orcid":"https://orcid.org/0000-0001-5697-0717"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaqi Wang","raw_affiliation_strings":["Northeastern University, shenyang, China"],"raw_orcid":"https://orcid.org/0009-0009-7389-8396","affiliations":[{"raw_affiliation_string":"Northeastern University, shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011264334","display_name":"Zhe Ji","orcid":"https://orcid.org/0000-0001-8011-9033"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhe Ji","raw_affiliation_strings":["Northeastern University, shenyang, China"],"raw_orcid":"https://orcid.org/0009-0009-9610-5790","affiliations":[{"raw_affiliation_string":"Northeastern University, shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080144393","display_name":"Feiliang Ren","orcid":"https://orcid.org/0000-0001-6824-1191"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feiliang Ren","raw_affiliation_strings":["Northeastern University, shenyang, China"],"raw_orcid":"https://orcid.org/0000-0001-6824-1191","affiliations":[{"raw_affiliation_string":"Northeastern University, shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102832508","display_name":"Yongkang Liu","orcid":"https://orcid.org/0000-0003-3098-0225"},"institutions":[{"id":"https://openalex.org/I4210152629","display_name":"Eastern University","ror":"https://ror.org/05e2ncr14","country_code":"BD","type":"education","lineage":["https://openalex.org/I4210152629"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Yongkang Liu","raw_affiliation_strings":["Northeastern University, qinhuangdao, China"],"raw_orcid":"https://orcid.org/0000-0003-3098-0225","affiliations":[{"raw_affiliation_string":"Northeastern University, qinhuangdao, China","institution_ids":["https://openalex.org/I4210152629"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93894112,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"223","last_page":"232"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.965399980545044,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.965399980545044,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.006399999838322401,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.0052999998442828655,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.7484999895095825},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.6953999996185303},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6626999974250793},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5936999917030334},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5878999829292297},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5799000263214111},{"id":"https://openalex.org/keywords/complementarity","display_name":"Complementarity (molecular biology)","score":0.48339998722076416},{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.46459999680519104},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.4553999900817871}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7559999823570251},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.7484999895095825},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.6953999996185303},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6626999974250793},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5936999917030334},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5878999829292297},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5871000289916992},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5799000263214111},{"id":"https://openalex.org/C202269582","wikidata":"https://www.wikidata.org/wiki/Q2644277","display_name":"Complementarity (molecular biology)","level":2,"score":0.48339998722076416},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.46459999680519104},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.4553999900817871},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.392300009727478},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.3783000111579895},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37049999833106995},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.3700999915599823},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.35530000925064087},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35030001401901245},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.3312000036239624},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.3287000060081482},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.3246000111103058},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.31869998574256897},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.30300000309944153},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.28299999237060547},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2809999883174896},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2547999918460846},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.2526000142097473},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810643","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810643","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810643","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810643","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7694810628890991,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2027461913","https://openalex.org/W2964187781","https://openalex.org/W4283702870","https://openalex.org/W4312504063","https://openalex.org/W4312584871","https://openalex.org/W4312933868","https://openalex.org/W4321480073","https://openalex.org/W4383877072","https://openalex.org/W4386065512","https://openalex.org/W4386065600","https://openalex.org/W4386065863","https://openalex.org/W4386072441","https://openalex.org/W4386075606","https://openalex.org/W4392172801","https://openalex.org/W4402727764","https://openalex.org/W4402753983","https://openalex.org/W4404136215","https://openalex.org/W4412378716","https://openalex.org/W4412944831","https://openalex.org/W4413145351","https://openalex.org/W4413146477","https://openalex.org/W4415433158","https://openalex.org/W7133186580","https://openalex.org/W7133188672","https://openalex.org/W7133193597","https://openalex.org/W7133213514","https://openalex.org/W7133217898","https://openalex.org/W7133220107"],"related_works":[],"abstract_inverted_index":{"Contrastive":[0],"vision-language":[1,12],"pre-training":[2],"models":[3,104],"such":[4],"as":[5],"CLIP":[6,154],"have":[7],"achieved":[8],"remarkable":[9],"progress":[10],"in":[11,20,77],"understanding.":[13],"However,":[14,59],"a":[15,98,124,142,152,162],"modality":[16,86,89,117,212],"gap":[17,29,49,87,213],"still":[18],"exists":[19],"the":[21,28,38,56,63,85,105,116,196,211,221],"shared":[22,57],"embedding":[23],"space,":[24],"which":[25,73,129,147],"refers":[26],"to":[27,46,70,157],"between":[30,65,107],"image":[31],"and":[32,67,132,137,179,191,202,214],"text":[33],"feature":[34,218],"clusters,":[35],"ultimately":[36],"limiting":[37],"performance":[39],"of":[40,120,187],"downstream":[41],"tasks.Most":[42],"existing":[43],"studies":[44],"attempt":[45],"alleviate":[47],"this":[48],"by":[50],"directly":[51],"pulling":[52],"embeddings":[53],"closer":[54],"within":[55],"space.":[58],"they":[60],"often":[61],"overlook":[62],"balance":[64],"alignment":[66],"distinctiveness,":[68],"leading":[69],"degraded":[71],"uniformity,":[72],"can":[74],"easily":[75],"result":[76],"representation":[78],"collapse.":[79],"Our":[80],"approach":[81],"focuses":[82],"on":[83,220],"addressing":[84],"from":[88,151],"distinctiveness.":[90],"We":[91],"propose":[92],"CCRA":[93,165,208],"(Cross-modal":[94],"Complementary":[95,126],"Representation":[96],"Alignment),":[97],"lightweight":[99],"post-alignment":[100],"framework":[101],"that":[102,207],"explicitly":[103],"complementarity":[106],"modalities":[108],"while":[109],"maintaining":[110],"semantic":[111,135],"consistency,":[112],"thereby":[113],"effectively":[114,209],"alleviating":[115],"gap.CCRA":[118],"consists":[119],"two":[121,174],"modules:":[122],"(1)":[123],"Feature":[125],"Network":[127],"(FCN),":[128],"dynamically":[130],"extracts":[131],"fuses":[133],"complementary":[134],"cues":[136],"learnable":[138],"query":[139],"vectors;":[140],"(2)":[141],"Distillation":[143],"Enhancement":[144],"Module":[145],"(DEM),":[146],"transfers":[148],"similarity":[149],"distributions":[150,219],"frozen":[153],"teacher":[155],"model":[156],"maintain":[158],"training":[159],"stability":[160],"through":[161],"distillation":[163],"constraint.":[164],"achieves":[166],"consistent":[167],"improvements":[168],"over":[169],"five":[170],"representative":[171],"baselines":[172],"across":[173],"cross-modal":[175],"retrieval":[176],"benchmarks":[177],"(COCO2017-Val":[178],"Flickr8k/30k).":[180],"On":[181],"COCO2017-Val,":[182],"it":[183],"shows":[184],"an":[185],"improvement":[186],"10.06%":[188],"for":[189,193],"I\u2192T":[190],"6.12%":[192],"T\u2192I,":[194],"reaching":[195],"best":[197],"overall":[198],"performance.":[199],"Both":[200],"quantitative":[201],"qualitative":[203],"analyses":[204],"further":[205],"confirm":[206],"narrows":[210],"produces":[215],"more":[216],"uniform":[217],"hypersphere.":[222]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
