{"id":"https://openalex.org/W7138127477","doi":"https://doi.org/10.1609/aaai.v40i11.37898","title":"Heterogeneous Uncertainty-Guided Composed Image Retrieval with Fine-Grained Probabilistic Learning","display_name":"Heterogeneous Uncertainty-Guided Composed Image Retrieval with Fine-Grained Probabilistic Learning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138127477","doi":"https://doi.org/10.1609/aaai.v40i11.37898"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i11.37898","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37898","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i11.37898","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058023480","display_name":"Haomiao Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haomiao Tang","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129723335","display_name":"Jinpeng Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinpeng Wang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129738274","display_name":"Minyi Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minyi Zhao","raw_affiliation_strings":["Fudan University"],"affiliations":[{"raw_affiliation_string":"Fudan University","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102398518","display_name":"Guanghao Meng","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"GuangHao Meng","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101366944","display_name":"Ruisheng Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruisheng Luo","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129713475","display_name":"Long Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Long Chen","raw_affiliation_strings":["The Hong Kong University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129641620","display_name":"Shu-Tao Xia","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shu-Tao Xia","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5058023480"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.4380597,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"11","first_page":"9386","last_page":"9394"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.4740000069141388,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.4740000069141388,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.3797000050544739,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.08659999817609787,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.7414000034332275},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.66839998960495},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.6018999814987183},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.5602999925613403},{"id":"https://openalex.org/keywords/divergence-from-randomness-model","display_name":"Divergence-from-randomness model","score":0.5013999938964844},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.49230000376701355},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4489000141620636},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.3961000144481659},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.3903000056743622}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7515000104904175},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.7414000034332275},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.66839998960495},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.6018999814987183},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5831999778747559},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.5602999925613403},{"id":"https://openalex.org/C149189445","wikidata":"https://www.wikidata.org/wiki/Q5283894","display_name":"Divergence-from-randomness model","level":3,"score":0.5013999938964844},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.49230000376701355},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4674000144004822},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4489000141620636},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4090999960899353},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3961000144481659},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3903000056743622},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.38019999861717224},{"id":"https://openalex.org/C66882249","wikidata":"https://www.wikidata.org/wiki/Q169336","display_name":"Homogeneous","level":2,"score":0.3797999918460846},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.37599998712539673},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.367000013589859},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.33820000290870667},{"id":"https://openalex.org/C2779532271","wikidata":"https://www.wikidata.org/wiki/Q445558","display_name":"Relevance feedback","level":4,"score":0.3084999918937683},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.30550000071525574},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.27300000190734863},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.26649999618530273},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.26589998602867126},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.26499998569488525},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.2621999979019165}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i11.37898","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37898","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i11.37898","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37898","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.738532543182373,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Composed":[0],"Image":[1],"Retrieval":[2],"(CIR)":[3],"enables":[4],"image":[5,11],"search":[6],"by":[7,81,117],"combining":[8],"a":[9,59,70,101,118],"reference":[10],"with":[12,141,160],"modification":[13],"text.":[14],"Intrinsic":[15],"noise":[16],"in":[17,33],"CIR":[18,42],"triplets":[19],"incurs":[20],"intrinsic":[21],"uncertainty":[22,92],"and":[23,49,54,77,87,97,138],"threatens":[24],"model's":[25],"robustness.":[26],"Probabilistic":[27],"learning":[28,73],"approaches":[29],"have":[30],"shown":[31],"promise":[32],"addressing":[34],"such":[35],"issues;":[36],"however,":[37],"they":[38],"fall":[39],"short":[40],"for":[41,52,94],"due":[43],"to":[44,64,123],"their":[45],"instance-level":[46],"holistic":[47,136],"modeling":[48],"homogeneous":[50],"treatments":[51],"queries":[53,76,96],"targets.":[55,99],"This":[56],"paper":[57],"introduces":[58],"Heterogeneous":[60],"Uncertainty-Guided":[61],"(HUG)":[62],"paradigm":[63],"overcome":[65],"these":[66],"limitations.":[67],"HUG":[68],"utilizes":[69],"fine-grained":[71,139],"probabilistic":[72],"framework,":[74],"where":[75],"targets":[78],"are":[79],"represented":[80],"Gaussian":[82],"embeddings":[83],"capturing":[84],"detailed":[85],"concepts":[86],"uncertainties.":[88],"We":[89,129],"customize":[90],"heterogeneous":[91],"estimations":[93],"multi-modal":[95,114],"uni-modal":[98,109],"Given":[100],"query,":[102],"we":[103],"capture":[104],"uncertainties":[105],"not":[106],"only":[107],"regarding":[108],"content":[110],"quality":[111],"but":[112],"also":[113],"coordination,":[115],"followed":[116],"provable":[119],"dynamic":[120],"weighting":[121],"mechanism":[122],"derive":[124],"the":[125,164],"comprehensive":[126,142],"query":[127],"uncertainty.":[128],"further":[130],"design":[131],"uncertainty-guided":[132],"objectives,":[133],"including":[134],"query-target":[135],"contrast":[137],"contrasts":[140],"negative":[143],"sampling":[144],"strategies,":[145],"which":[146],"effectively":[147],"enhance":[148],"discriminative":[149],"learning.":[150],"Experiments":[151],"on":[152],"benchmarks":[153],"demonstrate":[154],"HUG's":[155],"effectiveness":[156],"beyond":[157],"state-of-the-art":[158],"baselines,":[159],"faithful":[161],"analysis":[162],"justifying":[163],"technical":[165],"contributions.":[166]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
