{"id":"https://openalex.org/W7155103078","doi":"https://doi.org/10.48550/arxiv.2604.18051","title":"INTENT: Invariance and Discrimination-aware Noise Mitigation for Robust Composed Image Retrieval","display_name":"INTENT: Invariance and Discrimination-aware Noise Mitigation for Robust Composed Image Retrieval","publication_year":2026,"publication_date":"2026-04-20","ids":{"openalex":"https://openalex.org/W7155103078","doi":"https://doi.org/10.48550/arxiv.2604.18051"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.18051","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18051","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.18051","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134196302","display_name":"Zhiwei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134181244","display_name":"Yupeng Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Yupeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134201651","display_name":"Zhiheng Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Zhiheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134189417","display_name":"Zixu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zixu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129731238","display_name":"Jiale Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Jiale","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126005311","display_name":"Qinlei Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Qinlei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134134786","display_name":"Yinwei Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Yinwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.504800021648407,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.504800021648407,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.3977000117301941,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0406000018119812,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6478999853134155},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6471999883651733},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5626999735832214},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.4968000054359436},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4812999963760376},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.44830000400543213},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.40619999170303345},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.40529999136924744}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6912000179290771},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6478999853134155},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6471999883651733},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6248999834060669},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5626999735832214},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.4968000054359436},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4812999963760376},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.44830000400543213},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.40619999170303345},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.40529999136924744},{"id":"https://openalex.org/C35772409","wikidata":"https://www.wikidata.org/wiki/Q1323086","display_name":"Image noise","level":3,"score":0.39480000734329224},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.33320000767707825},{"id":"https://openalex.org/C2780186347","wikidata":"https://www.wikidata.org/wiki/Q11414","display_name":"Subnetwork","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3147999942302704},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.3142000138759613},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.30480000376701355},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.29989999532699585},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.2937000095844269},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26030001044273376}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.18051","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18051","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.18051","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18051","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7248214483261108,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Composed":[0],"Image":[1],"Retrieval":[2],"(CIR)":[3],"is":[4,126],"a":[5,212],"challenging":[6],"image":[7],"retrieval":[8],"paradigm":[9],"that":[10,39,83,216],"enables":[11],"to":[12,50,118,162,181,193],"retrieve":[13],"target":[14],"images":[15,23],"based":[16,220],"on":[17,131,172,221,231],"multimodal":[18],"queries":[19],"consisting":[20],"of":[21,72,242],"reference":[22],"and":[24,96,129,145,156,189,207,210,240],"modification":[25,121],"texts.":[26],"Although":[27],"substantial":[28],"progress":[29],"has":[30,77],"been":[31],"made":[32],"in":[33,46,62,85],"recent":[34],"years,":[35],"existing":[36],"methods":[37],"assume":[38],"all":[40],"samples":[41],"are":[42],"correctly":[43],"matched.":[44],"However,":[45,123],"real-world":[47],"scenarios,":[48],"due":[49],"high":[51],"triplet":[52],"annotation":[53,59],"costs,":[54],"CIR":[55,86],"datasets":[56,236],"inevitably":[57],"contain":[58],"errors,":[60],"resulting":[61],"incorrectly":[63],"matched":[64],"triplets.":[65],"To":[66,137],"address":[67],"this":[68],"issue,":[69],"the":[70,107,119,143,164,173,191,222,238],"problem":[71],"Noisy":[73],"Triplet":[74],"Correspondence":[75],"(NTC)":[76],"attracted":[78],"growing":[79],"attention.":[80],"We":[81],"argue":[82],"noise":[84,95,125,134,196],"can":[87],"be":[88],"categorized":[89],"into":[90],"two":[91,151,232],"types:":[92],"cross-modal":[93,132],"correspondence":[94,133,227],"modality-inherent":[97,124,195],"noise.":[98,166],"The":[99,167,199],"former":[100,168],"arises":[101],"from":[102,110],"mismatches":[103],"across":[104],"modalities,":[105],"whereas":[106],"latter":[108,200],"originates":[109],"intra-modal":[111],"background":[112],"interference":[113],"or":[114],"visual":[115,174,187],"factors":[116],"irrelevant":[117],"coarse-grained":[120],"annotations.":[122],"often":[127],"overlooked,":[128],"research":[130],"remains":[135],"nascent.":[136],"tackle":[138],"above":[139],"issues,":[140],"we":[141],"propose":[142],"Invariance":[144],"discrimiNaTion-awarE":[146],"Noise":[147],"neTwork":[148],"(INTENT),":[149],"comprising":[150],"components:":[152],"Visual":[153],"Invariant":[154],"Composition":[155],"Bi-Objective":[157],"Discriminative":[158],"Learning,":[159],"specifically":[160],"designed":[161],"handle":[163],"two-aspect":[165],"applies":[169],"causal":[170],"intervention":[171],"side":[175],"via":[176],"Fast":[177],"Fourier":[178],"Transform":[179],"(FFT)":[180],"generate":[182],"intervened":[183],"composed":[184],"features,":[185],"enforcing":[186],"invariance":[188],"enabling":[190,225],"model":[192],"ignore":[194],"during":[197],"composition.":[198],"adopts":[201],"collaborative":[202],"optimization":[203],"with":[204],"both":[205],"positive":[206],"negative":[208],"samples,":[209],"constructs":[211],"scalable":[213],"decision":[214],"boundary":[215],"dynamically":[217],"adjusts":[218],"decisions":[219],"loyalty":[223],"degree,":[224],"robust":[226],"discrimination.":[228],"Extensive":[229],"experiments":[230],"widely":[233],"used":[234],"benchmark":[235],"demonstrate":[237],"superiority":[239],"robustness":[241],"INTENT.":[243]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-22T00:00:00"}
