{"id":"https://openalex.org/W7164842872","doi":"https://doi.org/10.1145/3805622.3810829","title":"Audit-and-Repair Indexing: Probabilistic Caption Indexes for Robust Multimodal Retrieval","display_name":"Audit-and-Repair Indexing: Probabilistic Caption Indexes for Robust Multimodal Retrieval","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164842872","doi":"https://doi.org/10.1145/3805622.3810829"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810829","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810829","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810829","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5138634692","display_name":"Yangyang Liu","orcid":"https://orcid.org/0009-0009-2372-0880"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yangyang Liu","raw_affiliation_strings":["Independent Researcher, Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-2372-0880","affiliations":[{"raw_affiliation_string":"Independent Researcher, Beijing, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5138634692"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93808992,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1653","last_page":"1661"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9362000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9362000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.017999999225139618,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.015599999576807022,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.77920001745224},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6022999882698059},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.5390999913215637},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5322999954223633},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.5097000002861023},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4390999972820282},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.42590001225471497},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.4223000109195709}],"concepts":[{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.77920001745224},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7681000232696533},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6022999882698059},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.5390999913215637},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5322999954223633},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.5097000002861023},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46799999475479126},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4390999972820282},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4271000027656555},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.42590001225471497},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4223000109195709},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.4174000024795532},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4023999869823456},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.3889000117778778},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.36070001125335693},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.3488999903202057},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34860000014305115},{"id":"https://openalex.org/C130590232","wikidata":"https://www.wikidata.org/wiki/Q1671754","display_name":"Inverted index","level":3,"score":0.32600000500679016},{"id":"https://openalex.org/C197947376","wikidata":"https://www.wikidata.org/wiki/Q5155608","display_name":"Comparability","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3140000104904175},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.31310001015663147},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.25870001316070557}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810829","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810829","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810829","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810829","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.4769284129142761,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1647779468","https://openalex.org/W1861492603","https://openalex.org/W2166362129","https://openalex.org/W2167460663","https://openalex.org/W2185175083","https://openalex.org/W2332488709","https://openalex.org/W2425121537","https://openalex.org/W2561715562","https://openalex.org/W2745461083","https://openalex.org/W2769041395","https://openalex.org/W2962735233","https://openalex.org/W2963469388","https://openalex.org/W2963518342","https://openalex.org/W2998702515","https://openalex.org/W3035507081","https://openalex.org/W3105604018","https://openalex.org/W3172514680","https://openalex.org/W3211402614","https://openalex.org/W4210880854","https://openalex.org/W4256046779","https://openalex.org/W4304014690","https://openalex.org/W4312825288","https://openalex.org/W4386075815","https://openalex.org/W4390873312","https://openalex.org/W4402727764","https://openalex.org/W4402754015","https://openalex.org/W4402980118","https://openalex.org/W7133193597","https://openalex.org/W7133220561"],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"retrieval":[1],"systems":[2],"increasingly":[3],"build":[4],"indexes":[5,53],"from":[6,87],"captions":[7,16,64],"generated":[8],"by":[9],"large":[10],"multimodal":[11],"models.":[12],"In":[13],"practice,":[14],"these":[15],"may":[17],"contain":[18],"wrong":[19],"attributes":[20],"or":[21],"missing":[22],"details.":[23],"Once":[24],"stored":[25],"in":[26],"the":[27,93,128,135,170],"index,":[28],"such":[29],"errors":[30],"can":[31],"repeatedly":[32],"harm":[33],"future":[34],"queries,":[35],"especially":[36],"attribute":[37,100],"and":[38,56,65,102,114,141,147,158,167],"compositional":[39],"queries.":[40],"We":[41,90],"propose":[42],"Audit-and-Repair":[43,153],"Indexing":[44,154],"(ARI),":[45],"a":[46,67,96],"simple":[47],"closed-loop":[48],"framework":[49],"that":[50,126,152],"makes":[51],"caption-based":[52],"both":[54],"uncertainty-aware":[55,81],"fixable.":[57],"For":[58],"each":[59],"item,":[60],"we":[61],"sample":[62],"multiple":[63],"form":[66],"probabilistic":[68],"caption":[69,162,171],"index":[70,94,129],"(a":[71],"mean":[72],"embedding":[73],"with":[74,164],"an":[75,80,103,122],"uncertainty":[76],"estimate).":[77],"Retrieval":[78],"uses":[79],"score":[82],"to":[83,106,134],"reduce":[84],"over-confident":[85],"matches":[86,113],"unstable":[88],"captions.":[89],"then":[91],"audit":[92],"using":[95],"set":[97],"of":[98,110],"falsifiable":[99],"probes":[101],"evidence":[104],"verifier":[105],"mine":[107],"two":[108],"types":[109],"contradictions:":[111],"false-positive":[112],"false-negative":[115],"misses.":[116],"These":[117],"contradictions":[118],"provide":[119],"supervision":[120],"for":[121],"optimization-based":[123],"repair":[124],"objective":[125],"updates":[127],"parameters":[130],"while":[131],"staying":[132],"close":[133],"original":[136],"semantics.":[137],"Experiments":[138],"on":[139],"image-text":[140],"video-text":[142],"retrieval,":[143],"including":[144],"controlled":[145],"noise":[146],"bias":[148],"stress":[149],"tests,":[150],"show":[151],"improves":[155],"top-K":[156],"accuracy":[157],"ranking":[159],"stability":[160],"under":[161],"errors,":[163],"minimal":[165],"overhead":[166],"without":[168],"retraining":[169],"generator.":[172]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
