{"id":"https://openalex.org/W7164836607","doi":"https://doi.org/10.1145/3805622.3810606","title":"Contrastive Multimodal Fusion and Pseudo-Label method for Unsupervised Cross-Modal Hashing Retrieval","display_name":"Contrastive Multimodal Fusion and Pseudo-Label method for Unsupervised Cross-Modal Hashing Retrieval","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164836607","doi":"https://doi.org/10.1145/3805622.3810606"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810606","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810606","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810606","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043590120","display_name":"Qinze Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I126924076","display_name":"Chongqing Normal University","ror":"https://ror.org/01dcw5w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I126924076"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinze Zhu","raw_affiliation_strings":["College of Computer and Information Science, Chongqing Normal University, Chongqing, China"],"raw_orcid":"https://orcid.org/0009-0005-7503-2861","affiliations":[{"raw_affiliation_string":"College of Computer and Information Science, Chongqing Normal University, Chongqing, China","institution_ids":["https://openalex.org/I126924076"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014823194","display_name":"Jiayi Yang","orcid":"https://orcid.org/0000-0002-3135-689X"},"institutions":[{"id":"https://openalex.org/I126924076","display_name":"Chongqing Normal University","ror":"https://ror.org/01dcw5w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I126924076"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiayi Yang","raw_affiliation_strings":["College of Computer and Information Science, Chongqing Normal University, Chongqing, China"],"raw_orcid":"https://orcid.org/0009-0007-7629-2432","affiliations":[{"raw_affiliation_string":"College of Computer and Information Science, Chongqing Normal University, Chongqing, China","institution_ids":["https://openalex.org/I126924076"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102525775","display_name":"Yijie Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I126924076","display_name":"Chongqing Normal University","ror":"https://ror.org/01dcw5w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I126924076"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yijie Zhu","raw_affiliation_strings":["College of Computer and Information Science, Chongqing Normal University, Chongqing, China"],"raw_orcid":"https://orcid.org/0009-0000-4524-4496","affiliations":[{"raw_affiliation_string":"College of Computer and Information Science, Chongqing Normal University, Chongqing, China","institution_ids":["https://openalex.org/I126924076"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100686151","display_name":"Mingyong Li","orcid":"https://orcid.org/0000-0002-5517-3633"},"institutions":[{"id":"https://openalex.org/I126924076","display_name":"Chongqing Normal University","ror":"https://ror.org/01dcw5w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I126924076"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyong Li","raw_affiliation_strings":["College of Computer and Information Science, Chongqing Normal University, Chongqing, China"],"raw_orcid":"https://orcid.org/0000-0002-5517-3633","affiliations":[{"raw_affiliation_string":"College of Computer and Information Science, Chongqing Normal University, Chongqing, China","institution_ids":["https://openalex.org/I126924076"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93731439,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"308","last_page":"316"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9466000199317932,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9466000199317932,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.00570000009611249,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.004600000102072954,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.6638000011444092},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4683000147342682},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4641999900341034},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.45820000767707825},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.4546000063419342},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.44200000166893005},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.44190001487731934},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.4171000123023987},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4025999903678894}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8151999711990356},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.6638000011444092},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6495000123977661},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4683000147342682},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4641999900341034},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.45820000767707825},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.4546000063419342},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.44200000166893005},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.44190001487731934},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.4171000123023987},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4025999903678894},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4000000059604645},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3919000029563904},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.3732999861240387},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.3732999861240387},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34360000491142273},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.32760000228881836},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3271999955177307},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32190001010894775},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.31869998574256897},{"id":"https://openalex.org/C133667856","wikidata":"https://www.wikidata.org/wiki/Q5439682","display_name":"Feature hashing","level":5,"score":0.31189998984336853},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.310699999332428},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.29809999465942383},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28859999775886536},{"id":"https://openalex.org/C74270461","wikidata":"https://www.wikidata.org/wiki/Q1625299","display_name":"Locality-sensitive hashing","level":4,"score":0.28769999742507935},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2712000012397766},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.25780001282691956}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810606","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810606","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810606","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810606","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6965726017951965,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1976258951","https://openalex.org/W2007972815","https://openalex.org/W2049993534","https://openalex.org/W2086958058","https://openalex.org/W2155803963","https://openalex.org/W2266728343","https://openalex.org/W2430055109","https://openalex.org/W2476624367","https://openalex.org/W2565611351","https://openalex.org/W2749145308","https://openalex.org/W2795832645","https://openalex.org/W2808243243","https://openalex.org/W2808301810","https://openalex.org/W2963187862","https://openalex.org/W2982008649","https://openalex.org/W2982905682","https://openalex.org/W3003735286","https://openalex.org/W3008072721","https://openalex.org/W3027918202","https://openalex.org/W3035747010","https://openalex.org/W3120620666","https://openalex.org/W3122091215","https://openalex.org/W3207332079","https://openalex.org/W3207778762","https://openalex.org/W4285242239","https://openalex.org/W4312477797","https://openalex.org/W4321021417","https://openalex.org/W4382449735","https://openalex.org/W4385614537","https://openalex.org/W4402979747"],"related_works":[],"abstract_inverted_index":{"Unsupervised":[0],"cross-modal":[1,68,214],"hashing":[2,69],"retrieval":[3,10,57,70,205,230],"has":[4],"demonstrated":[5],"significant":[6],"advantages":[7],"in":[8,34,227],"heterogeneous":[9],"tasks,":[11],"as":[12],"it":[13],"requires":[14],"no":[15],"manually":[16],"annotated":[17],"data":[18],"and":[19,40,76,112,185,232,238],"exhibits":[20],"strong":[21],"scalability":[22],"across":[23,142],"modalities.":[24,143],"However,":[25],"existing":[26,224],"unsupervised":[27,67],"methods":[28,226],"often":[29],"suffer":[30],"from":[31],"an":[32,82,122],"imbalance":[33],"the":[35,44,110,139,167,173],"representation":[36],"quality":[37],"between":[38,105],"image":[39],"text":[41],"features.":[42,117],"Moreover,":[43],"lack":[45],"of":[46,115,159,177,229],"explicit":[47],"semantic":[48,53,103,140,157,164,189,196],"supervision":[49,165],"frequently":[50],"leads":[51],"to":[52,135,149,179],"degradation,":[54],"which":[55,128],"limits":[56],"accuracy.":[58],"To":[59,162],"address":[60],"these":[61],"challenges,":[62],"we":[63,80,120,171],"propose":[64],"a":[65,90,151,187],"novel":[66],"framework,":[71],"termed":[72],"Contrastive":[73],"Multimodal":[74],"Fusion":[75],"Pseudo-Labeling":[77],"(CMFPL).":[78],"Specifically,":[79],"design":[81,98],"efficient":[83],"contrastive":[84,95],"multimodal":[85,92,124],"learning":[86,169],"module":[87],"that":[88,217],"integrates":[89],"Transformer-based":[91],"encoder":[93],"with":[94,132],"loss.":[96],"This":[97,144,192],"not":[99],"only":[100],"enhances":[101],"deep":[102],"interaction":[104],"modalities":[106],"but":[107],"also":[108],"improves":[109],"expressiveness":[111],"discriminative":[113],"capability":[114,176],"modality-specific":[116],"In":[118],"addition,":[119],"develop":[121],"innovative":[123],"association":[125,190],"matrix":[126,145],"generator,":[127],"combines":[129],"cosine":[130],"similarity":[131],"K-Means":[133],"clustering":[134],"more":[136],"accurately":[137],"model":[138],"correlations":[141],"is":[146],"further":[147],"utilized":[148],"guide":[150],"Graph":[152],"Convolutional":[153],"Network":[154],"(GCN)":[155],"for":[156,182,198],"reconstruction":[158],"hash":[160,168,199],"codes.":[161],"reinforce":[163],"during":[166],"process,":[170],"leverage":[172],"zero-shot":[174],"classification":[175],"CLIP":[178],"generate":[180],"pseudo-labels":[181],"each":[183],"sample":[184],"construct":[186],"high-quality":[188],"graph.":[191],"graph":[193],"provides":[194],"robust":[195],"guidance":[197],"code":[200],"learning,":[201],"thereby":[202],"significantly":[203],"improving":[204],"performance.":[206],"Extensive":[207],"experiments":[208],"conducted":[209],"on":[210],"three":[211],"publicly":[212],"available":[213],"datasets":[215],"demonstrate":[216],"our":[218],"proposed":[219],"CMFPL":[220],"framework":[221],"consistently":[222],"outperforms":[223],"state-of-the-art":[225],"terms":[228],"accuracy":[231],"robustness,":[233],"fully":[234],"validating":[235],"its":[236],"effectiveness":[237],"superiority.":[239]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
