{"id":"https://openalex.org/W7143526407","doi":"https://doi.org/10.48550/arxiv.2603.26341","title":"HINT: Composed Image Retrieval with Dual-path Compositional Contextualized Network","display_name":"HINT: Composed Image Retrieval with Dual-path Compositional Contextualized Network","publication_year":2026,"publication_date":"2026-03-27","ids":{"openalex":"https://openalex.org/W7143526407","doi":"https://doi.org/10.48550/arxiv.2603.26341"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.26341","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26341","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.26341","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130918807","display_name":"Mingyu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Mingyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130990537","display_name":"Zixu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zixu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130985419","display_name":"Zhiwei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130974241","display_name":"Zhiheng Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Zhiheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130938917","display_name":"Xiaowei Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Xiaowei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128154946","display_name":"Jiajia Nie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nie, Jiajia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130938320","display_name":"Yinwei Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Yinwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130990259","display_name":"Yupeng Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Yupeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.4108000099658966,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.4108000099658966,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.4043999910354614,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.14000000059604645,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.7106999754905701},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6341000199317932},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5989000201225281},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5649999976158142},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5598000288009644},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5503000020980835},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5342000126838684},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5045999884605408},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.476500004529953}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8130000233650208},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.7106999754905701},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6341000199317932},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5989000201225281},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5649999976158142},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5598000288009644},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5503000020980835},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5475999712944031},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5342000126838684},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5045999884605408},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4993000030517578},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.476500004529953},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4634999930858612},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.454800009727478},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3589000105857849},{"id":"https://openalex.org/C93226319","wikidata":"https://www.wikidata.org/wiki/Q193137","display_name":"Differential (mechanical device)","level":2,"score":0.3441999852657318},{"id":"https://openalex.org/C199579030","wikidata":"https://www.wikidata.org/wiki/Q2851778","display_name":"Automatic image annotation","level":4,"score":0.3255999982357025},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.32499998807907104},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.29589998722076416},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.2849000096321106},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C143271835","wikidata":"https://www.wikidata.org/wiki/Q254515","display_name":"Similitude","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2728999853134155},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C2986492983","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image matching","level":3,"score":0.2597000002861023},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.25540000200271606}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.26341","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26341","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.26341","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26341","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7177708745002747,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Composed":[0],"Image":[1],"Retrieval":[2],"(CIR)":[3],"is":[4,70],"a":[5,29,34,53,87,97],"challenging":[6],"image":[7,18,36],"retrieval":[8],"paradigm.":[9],"It":[10],"aims":[11],"to":[12,76],"retrieve":[13],"target":[14],"images":[15],"from":[16],"large-scale":[17],"databases":[19],"that":[20],"are":[21,151],"consistent":[22],"with":[23],"the":[24,57,84,110,120,144],"modification":[25,38],"semantics,":[26],"based":[27],"on":[28,135],"multimodal":[30],"query":[31],"composed":[32],"of":[33,59,86,123,146],"reference":[35],"and":[37,50,82,108,115],"text.":[39],"Although":[40],"existing":[41],"methods":[42],"have":[43],"made":[44],"significant":[45],"progress":[46],"in":[47,62,126],"cross-modal":[48],"alignment":[49],"feature":[51],"fusion,":[52],"key":[54],"flaw":[55],"remains:":[56],"neglect":[58],"contextual":[60],"information":[61],"discriminating":[63],"matching":[64,114],"samples.":[65],"However,":[66],"addressing":[67],"this":[68],"limitation":[69],"not":[71],"an":[72],"easy":[73],"task":[74],"due":[75],"two":[77,139],"challenges:":[78],"1)":[79],"implicit":[80],"dependencies":[81],"2)":[83],"lack":[85],"differential":[88],"amplification":[89],"mechanism.":[90],"To":[91],"address":[92],"these":[93],"challenges,":[94],"we":[95],"propose":[96],"dual-patH":[98],"composItional":[99],"coNtextualized":[100],"neTwork":[101],"(HINT),":[102],"which":[103],"can":[104],"perform":[105],"contextualized":[106],"encoding":[107],"amplify":[109],"similarity":[111],"differences":[112],"between":[113],"non-matching":[116],"samples,":[117],"thus":[118],"improving":[119],"upper":[121],"performance":[122,134],"CIR":[124,140],"models":[125],"complex":[127],"scenarios.":[128],"Our":[129],"HINT":[130,148],"model":[131],"achieves":[132],"optimal":[133],"all":[136],"metrics":[137],"across":[138],"benchmark":[141],"datasets,":[142],"demonstrating":[143],"superiority":[145],"our":[147],"model.":[149],"Codes":[150],"available":[152],"at":[153],"https://github.com/zh-mingyu/HINT.":[154]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-31T00:00:00"}
