{"id":"https://openalex.org/W4416386050","doi":"https://doi.org/10.1145/3777477","title":"Noise-Robust Generative Hashing for Cross-Modal Retrieval","display_name":"Noise-Robust Generative Hashing for Cross-Modal Retrieval","publication_year":2025,"publication_date":"2025-11-19","ids":{"openalex":"https://openalex.org/W4416386050","doi":"https://doi.org/10.1145/3777477"},"language":"en","primary_location":{"id":"doi:10.1145/3777477","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3777477","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zequn Wang","orcid":"https://orcid.org/0009-0005-0037-3098"},"institutions":[{"id":"https://openalex.org/I28006308","display_name":"Shandong Normal University","ror":"https://ror.org/01wy3h363","country_code":"CN","type":"education","lineage":["https://openalex.org/I28006308"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zequn Wang","raw_affiliation_strings":["Shandong Normal University, Jinan, China and Tongji University, Shanghai, China","Shandong Normal University, China and Tongji University, China"],"raw_orcid":"https://orcid.org/0009-0005-0037-3098","affiliations":[{"raw_affiliation_string":"Shandong Normal University, Jinan, China and Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I28006308"]},{"raw_affiliation_string":"Shandong Normal University, China and Tongji University, China","institution_ids":["https://openalex.org/I28006308"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014809128","display_name":"Tianshi Wang","orcid":"https://orcid.org/0000-0002-8013-5188"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianshi Wang","raw_affiliation_strings":["Tongji University, Shanghai, China","Tongji University, China"],"raw_orcid":"https://orcid.org/0000-0002-8013-5188","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Tongji University, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101901063","display_name":"Fengling Li","orcid":"https://orcid.org/0000-0002-3432-6215"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Fengling Li","raw_affiliation_strings":["University of Technology Sydney, Sydney, Australia","Australian Artificial Intelligence Institute, University of Technology Sydney, Australia"],"raw_orcid":"https://orcid.org/0000-0002-3432-6215","affiliations":[{"raw_affiliation_string":"University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"Australian Artificial Intelligence Institute, University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338386","display_name":"Jingjing Li","orcid":"https://orcid.org/0000-0002-5504-2529"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingjing Li","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China","University of Electronic Science and Technology of China, China"],"raw_orcid":"https://orcid.org/0000-0002-5504-2529","affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"University of Electronic Science and Technology of China, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108048954","display_name":"Lei Zhu","orcid":"https://orcid.org/0000-0002-2993-7142"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Zhu","raw_affiliation_strings":["Tongji University, Shanghai, China","Tongji University, China"],"raw_orcid":"https://orcid.org/0000-0002-2993-7142","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Tongji University, China","institution_ids":["https://openalex.org/I116953780"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I28006308"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34131985,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"22","issue":"4","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.873199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.873199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.03869999945163727,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.033799998462200165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6115000247955322},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.6065000295639038},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5938000082969666},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5782999992370605},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5555999875068665},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5013999938964844},{"id":"https://openalex.org/keywords/feature-hashing","display_name":"Feature hashing","score":0.47380000352859497},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.44269999861717224},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4228000044822693}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.88919997215271},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6115000247955322},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.6065000295639038},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5938000082969666},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5782999992370605},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5593000054359436},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5555999875068665},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5013999938964844},{"id":"https://openalex.org/C133667856","wikidata":"https://www.wikidata.org/wiki/Q5439682","display_name":"Feature hashing","level":5,"score":0.47380000352859497},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.44269999861717224},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4228000044822693},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4090999960899353},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.3935999870300293},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.3434999883174896},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3208000063896179},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.31790000200271606},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C122907437","wikidata":"https://www.wikidata.org/wiki/Q5318999","display_name":"Dynamic perfect hashing","level":5,"score":0.3124000132083893},{"id":"https://openalex.org/C4199805","wikidata":"https://www.wikidata.org/wiki/Q2725903","display_name":"Gaussian noise","level":2,"score":0.2971000075340271},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26930001378059387},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C168781493","wikidata":"https://www.wikidata.org/wiki/Q80585","display_name":"Associative array","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.25369998812675476},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3777477","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3777477","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1489116628","https://openalex.org/W1910300841","https://openalex.org/W2007972815","https://openalex.org/W2059471177","https://openalex.org/W2155803963","https://openalex.org/W2266728343","https://openalex.org/W2739107216","https://openalex.org/W2765440071","https://openalex.org/W2781821509","https://openalex.org/W2802864907","https://openalex.org/W2966632286","https://openalex.org/W3042609801","https://openalex.org/W3072820880","https://openalex.org/W3175740157","https://openalex.org/W4285242239","https://openalex.org/W4285288078","https://openalex.org/W4288966168","https://openalex.org/W4312477797","https://openalex.org/W4321488152","https://openalex.org/W4323022360","https://openalex.org/W4323663038","https://openalex.org/W4380303571","https://openalex.org/W4383503840","https://openalex.org/W4385825476","https://openalex.org/W4390659080","https://openalex.org/W4393170802","https://openalex.org/W4395029038","https://openalex.org/W4395056490","https://openalex.org/W4398226150","https://openalex.org/W4409263215","https://openalex.org/W4411472285","https://openalex.org/W4412403817"],"related_works":[],"abstract_inverted_index":{"Deep":[0],"hashing":[1],"has":[2],"proven":[3],"remarkable":[4],"effectiveness":[5],"for":[6,46,113],"large-scale":[7],"cross-modal":[8,22,98],"retrieval,":[9],"yet":[10],"its":[11,189],"performance":[12,74],"is":[13,30],"highly":[14],"vulnerable":[15],"to":[16,90,154],"supervisory":[17],"noise,":[18],"such":[19],"as":[20],"mismatched":[21],"correspondences":[23],"and":[24,38,63,143,214,219],"incorrect":[25],"category":[26],"labels.":[27,172],"Such":[28],"noise":[29,60,95,105],"prevalent":[31],"in":[32,61,72,97,207],"real-world":[33],"scenarios,":[34,210],"where":[35],"correspondence":[36],"mismatches":[37],"label":[39],"inaccuracies":[40],"often":[41],"coexist,":[42],"posing":[43],"significant":[44],"challenges":[45],"learning":[47],"accurate":[48],"multimodal":[49,115],"representations.":[50],"Existing":[51],"methods":[52],"typically":[53],"address":[54,77],"only":[55],"a":[56,86,103,124,150,174],"single":[57],"type":[58],"of":[59,68,94,184],"isolation":[62],"neglect":[64],"the":[65,110,164,181],"potential":[66],"value":[67],"noisy":[69,160,167,209],"data,":[70],"resulting":[71],"limited":[73],"gains.":[75],"To":[76],"these":[78,130],"challenges,":[79],"we":[80],"propose":[81],"Noise-Robust":[82],"Generative":[83],"Hashing":[84],"(NRGH),":[85],"unified":[87],"framework":[88],"designed":[89],"accommodate":[91],"various":[92,208],"forms":[93],"inherent":[96],"retrieval.":[99],"Specifically,":[100],"NRGH":[101,133,202],"introduces":[102],"hash-driven":[104],"estimation":[106],"module":[107],"that":[108,158,201],"computes":[109],"confidence":[111,131],"score":[112],"each":[114,185],"sample":[116,186],"by":[117,129],"combining":[118],"frozen":[119],"auxiliary":[120],"hash":[121],"functions":[122],"with":[123],"Gaussian":[125],"mixture":[126],"model.":[127],"Guided":[128],"scores,":[132],"performs":[134],"data":[135,182],"correction":[136],"through":[137],"two":[138],"stages:":[139],"generative":[140],"text":[141],"refinement":[142],"multi-label":[144],"probability":[145],"calibration.":[146],"The":[147],"former":[148],"leverages":[149],"pre-trained":[151],"vision-language":[152],"model":[153],"generate":[155],"descriptive":[156],"captions":[157],"refine":[159],"textual":[161],"information,":[162],"while":[163],"latter":[165],"corrects":[166],"labels":[168],"using":[169],"confidence-aware":[170],"soft":[171],"Furthermore,":[173],"dynamic":[175],"margin":[176],"contrastive":[177],"loss":[178],"adaptively":[179],"modulates":[180],"contribution":[183],"based":[187],"on":[188,197],"confidence,":[190],"enabling":[191],"sample-level":[192],"adaptive":[193],"learning.":[194],"Extensive":[195],"experiments":[196],"benchmark":[198],"datasets":[199,220],"demonstrate":[200],"significantly":[203],"exceeds":[204],"state-of-the-art":[205],"baselines":[206],"delivering":[211],"superior":[212],"robustness":[213],"accuracy.":[215],"Our":[216],"source":[217],"codes":[218],"are":[221],"available":[222],"at":[223],"https://github.com/xiaolaohuuu/NRGH":[224],".":[225]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-11-19T00:00:00"}
