{"id":"https://openalex.org/W7155204878","doi":"https://doi.org/10.48550/arxiv.2604.19386","title":"Air-Know: Arbiter-Calibrated Knowledge-Internalizing Robust Network for Composed Image Retrieval","display_name":"Air-Know: Arbiter-Calibrated Knowledge-Internalizing Robust Network for Composed Image Retrieval","publication_year":2026,"publication_date":"2026-04-21","ids":{"openalex":"https://openalex.org/W7155204878","doi":"https://doi.org/10.48550/arxiv.2604.19386"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.19386","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19386","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.19386","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134349779","display_name":"Zhiheng Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Zhiheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134323984","display_name":"Yupeng Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Yupeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130388821","display_name":"Qianyun Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Qianyun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134297023","display_name":"Shiqi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shiqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134342971","display_name":"Zhiwei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134334431","display_name":"Zixu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zixu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.758400022983551,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.758400022983551,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.1356000006198883,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.040699999779462814,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.49459999799728394},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.4814000129699707},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.45719999074935913},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.38190001249313354},{"id":"https://openalex.org/keywords/relevance-feedback","display_name":"Relevance feedback","score":0.36890000104904175},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.3677999973297119},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.36500000953674316},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.35010001063346863},{"id":"https://openalex.org/keywords/semantic-matching","display_name":"Semantic matching","score":0.3490000069141388}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7817999720573425},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5674999952316284},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.49459999799728394},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.4814000129699707},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.45719999074935913},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4332999885082245},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.38190001249313354},{"id":"https://openalex.org/C2779532271","wikidata":"https://www.wikidata.org/wiki/Q445558","display_name":"Relevance feedback","level":4,"score":0.36890000104904175},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3677999973297119},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36570000648498535},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.36500000953674316},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.35010001063346863},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.3490000069141388},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.34540000557899475},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.3366999924182892},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.33090001344680786},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.31790000200271606},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.3109000027179718},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.30799999833106995},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.28769999742507935},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.28130000829696655},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.27810001373291016},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.2720000147819519},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2653999924659729},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.25929999351501465},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.25200000405311584},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.19386","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19386","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.19386","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19386","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5652146935462952,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Composed":[0],"Image":[1],"Retrieval":[2],"(CIR)":[3],"has":[4],"attracted":[5],"significant":[6],"attention":[7],"due":[8],"to":[9,54,122,141,158],"its":[10,16],"flexible":[11],"multimodal":[12],"query":[13],"method,":[14],"yet":[15],"development":[17],"is":[18,71],"severely":[19],"constrained":[20],"by":[21],"the":[22,35,40,60,69,74,143,154,160,190],"Noisy":[23],"Triplet":[24],"Correspondence":[25],"(NTC)":[26],"problem.":[27],"Most":[28],"existing":[29,186],"robust":[30],"learning":[31],"methods":[32,188],"rely":[33],"on":[34,176],"\"small":[36],"loss":[37],"hypothesis\",":[38],"but":[39],"unique":[41],"semantic":[42],"ambiguity":[43],"in":[44,62,198],"NTC,":[45],"such":[46],"as":[47,118],"\"partial":[48],"matching\",":[49],"invalidates":[50],"this":[51,83],"assumption,":[52],"leading":[53],"unreliable":[55],"noise":[56],"identification.":[57],"This":[58],"entraps":[59],"model":[61],"a":[63,88,124,137,164,169],"self":[64],"dependent":[65],"vicious":[66],"cycle":[67],"where":[68],"learner":[70],"intertwined":[72],"with":[73],"arbiter,":[75],"ultimately":[76],"causing":[77],"catastrophic":[78],"\"representation":[79],"pollution\".":[80],"To":[81],"address":[82],"critical":[84],"challenge,":[85],"we":[86],"propose":[87],"novel":[89],"\"Expert-Proxy-Diversion\"":[90],"decoupling":[91],"paradigm,":[92],"named":[93],"Air-Know":[94,101,183],"(ArbIteR":[95],"calibrated":[96],"Knowledge":[97,131],"iNternalizing":[98],"rObust":[99],"netWork).":[100],"incorporates":[102],"three":[103],"core":[104],"modules:":[105],"(1)":[106],"External":[107],"Prior":[108],"Arbitration":[109],"(EPA),":[110],"which":[111,134,152],"utilizes":[112],"Multimodal":[113],"Large":[114],"Language":[115],"Models":[116],"(MLLMs)":[117],"an":[119],"offline":[120],"expert":[121],"construct":[123],"high":[125],"precision":[126],"anchor":[127],"dataset;":[128],"(2)":[129],"Expert":[130],"Internalization":[132],"(EKI),":[133],"efficiently":[135],"guides":[136],"lightweight":[138],"proxy":[139],"\"arbiter\"":[140],"internalize":[142],"expert's":[144],"discriminative":[145],"logic;":[146],"(3)":[147],"Dual":[148],"Stream":[149],"Reconciliation":[150],"(DSR),":[151],"leverages":[153],"EKI's":[155],"matching":[156],"confidence":[157],"divert":[159],"training":[161],"data,":[162],"achieving":[163],"clean":[165],"alignment":[166],"stream":[167],"and":[168],"representation":[170],"feedback":[171],"reconciliation":[172],"stream.":[173],"Extensive":[174],"experiments":[175],"multiple":[177],"CIR":[178],"benchmark":[179],"datasets":[180],"demonstrate":[181],"that":[182],"significantly":[184],"outperforms":[185],"SOTA":[187],"under":[189],"NTC":[191],"setting,":[192],"while":[193],"also":[194],"showing":[195],"strong":[196],"competitiveness":[197],"traditional":[199],"CIR.":[200]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-23T00:00:00"}
