{"id":"https://openalex.org/W4396821179","doi":"https://doi.org/10.1145/3652583.3658032","title":"Enhancing Interactive Image Retrieval With Query Rewriting Using Large Language Models and Vision Language Models","display_name":"Enhancing Interactive Image Retrieval With Query Rewriting Using Large Language Models and Vision Language Models","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4396821179","doi":"https://doi.org/10.1145/3652583.3658032"},"language":"en","primary_location":{"id":"doi:10.1145/3652583.3658032","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658032","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658032","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658032","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044091982","display_name":"Hongyi Zhu","orcid":"https://orcid.org/0009-0006-0298-0905"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Hongyi Zhu","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"raw_orcid":"https://orcid.org/0009-0006-0298-0905","affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091066062","display_name":"Jia-Hong Huang","orcid":"https://orcid.org/0000-0001-7943-2591"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Jia-Hong Huang","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"raw_orcid":"https://orcid.org/0000-0001-7943-2591","affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075331928","display_name":"Stevan Rudinac","orcid":"https://orcid.org/0000-0003-1904-8736"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Stevan Rudinac","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"raw_orcid":"https://orcid.org/0000-0003-1904-8736","affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055639036","display_name":"Evangelos Kanoulas","orcid":"https://orcid.org/0000-0002-8312-0694"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Evangelos Kanoulas","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-8312-0694","affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5044091982"],"corresponding_institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":3.067,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.92544561,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"978","last_page":"987"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8842384815216064},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6949200630187988},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.6908383965492249},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.6653383374214172},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5677220225334167},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.520592212677002},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5195612907409668},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.5028581023216248},{"id":"https://openalex.org/keywords/query-expansion","display_name":"Query expansion","score":0.5011003017425537},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.4938119351863861},{"id":"https://openalex.org/keywords/query-language","display_name":"Query language","score":0.43705397844314575},{"id":"https://openalex.org/keywords/relevance-feedback","display_name":"Relevance feedback","score":0.412461519241333},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.41237252950668335},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4072979688644409},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3577662408351898}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8842384815216064},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6949200630187988},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.6908383965492249},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.6653383374214172},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5677220225334167},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.520592212677002},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5195612907409668},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.5028581023216248},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.5011003017425537},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.4938119351863861},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.43705397844314575},{"id":"https://openalex.org/C2779532271","wikidata":"https://www.wikidata.org/wiki/Q445558","display_name":"Relevance feedback","level":4,"score":0.412461519241333},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.41237252950668335},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4072979688644409},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3577662408351898},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3652583.3658032","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658032","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658032","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2404.18746","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.18746","pdf_url":"https://arxiv.org/pdf/2404.18746","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:dare.uva.nl:publications/88d438d8-9c7c-426a-ae4f-57fd76b3e463","is_oa":true,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/enhancing-interactive-image-retrieval-with-query-rewriting-using-large-language-models-and-vision-language-models(88d438d8-9c7c-426a-ae4f-57fd76b3e463).html","pdf_url":"https://pure.uva.nl/ws/files/232503873/3652583.3658032.pdf","source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Zhu, H, Huang, J-H, Rudinac, S & Kanoulas, E 2024, Enhancing Interactive Image Retrieval With Query Rewriting Using Large Language Models and Vision Language Models. in Proceedings of the 14th Annual ACM International Conference on Multimedia Retrieval (ICMR'24) : Phuket, Thailand, June 10-14, 2024. Association for Computing Machinery, New York, NY, pp. 978-987, 2024 International Conference on Multimedia Retrieval, ICMR 2024, Phuket, Thailand, 10/06/24. https://doi.org/10.1145/3652583.3658032","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1145/3652583.3658032","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658032","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658032","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7799999713897705,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4396821179.pdf"},"referenced_works_count":45,"referenced_works":["https://openalex.org/W54257720","https://openalex.org/W219040644","https://openalex.org/W1654865708","https://openalex.org/W1690919088","https://openalex.org/W1946093182","https://openalex.org/W1973900256","https://openalex.org/W1990821053","https://openalex.org/W1992113527","https://openalex.org/W1999970330","https://openalex.org/W2006604430","https://openalex.org/W2033365921","https://openalex.org/W2046434597","https://openalex.org/W2059844599","https://openalex.org/W2060216474","https://openalex.org/W2065096648","https://openalex.org/W2096733369","https://openalex.org/W2097241500","https://openalex.org/W2100438118","https://openalex.org/W2103163130","https://openalex.org/W2105157020","https://openalex.org/W2105981469","https://openalex.org/W2130660124","https://openalex.org/W2131938770","https://openalex.org/W2155906060","https://openalex.org/W2294130536","https://openalex.org/W2568262903","https://openalex.org/W2760390332","https://openalex.org/W2766630207","https://openalex.org/W2908469318","https://openalex.org/W2963220254","https://openalex.org/W2963588253","https://openalex.org/W2964157791","https://openalex.org/W2973459786","https://openalex.org/W3015549258","https://openalex.org/W3033159379","https://openalex.org/W3155067862","https://openalex.org/W3166125679","https://openalex.org/W3166304536","https://openalex.org/W4212930167","https://openalex.org/W4285531802","https://openalex.org/W4286696412","https://openalex.org/W4293248017","https://openalex.org/W4313181088","https://openalex.org/W4379806382","https://openalex.org/W4385571112"],"related_works":["https://openalex.org/W4234076403","https://openalex.org/W1481401966","https://openalex.org/W2136177730","https://openalex.org/W2576473474","https://openalex.org/W2572349046","https://openalex.org/W2382153208","https://openalex.org/W1160915619","https://openalex.org/W2027155619","https://openalex.org/W2577784223","https://openalex.org/W2230616111"],"abstract_inverted_index":{"Image":[0],"search":[1,20,26],"stands":[2],"as":[3,64],"a":[4,96,102,128,155,197,226],"pivotal":[5],"task":[6],"in":[7,95,118,142,201],"multimedia":[8],"and":[9,54,67,231],"computer":[10],"vision,":[11],"finding":[12],"applications":[13],"across":[14],"diverse":[15],"domains,":[16],"ranging":[17],"from":[18,40],"internet":[19],"to":[21,110,135,165],"medical":[22],"diagnostics.":[23],"Conventional":[24],"image":[25,83,108,143,167,214],"systems":[27],"operate":[28],"by":[29,146,158],"accepting":[30],"textual":[31],"or":[32],"visual":[33],"queries,":[34,116],"retrieving":[35],"the":[36,41,61,68,112,160,166,184,208,217,223],"top-relevant":[37],"candidate":[38],"results":[39],"database.":[42],"However,":[43],"prevalent":[44],"methods":[45,58],"often":[46],"rely":[47],"on":[48,91],"single-turn":[49],"procedures,":[50],"introducing":[51],"potential":[52],"inaccuracies":[53,141],"limited":[55],"recall.":[56,204],"These":[57],"also":[59],"face":[60],"challenges,":[62],"such":[63],"vocabulary":[65],"mismatch":[66],"semantic":[69],"gap,":[70],"constraining":[71],"their":[72],"overall":[73],"effectiveness.":[74],"To":[75,149],"address":[76],"these":[77],"issues,":[78],"we":[79,126,153,182],"propose":[80],"an":[81,211,220],"interactive":[82,213],"retrieval":[84,163,168,215],"system":[85,100,189],"capable":[86],"of":[87,114,186,203,210,219,225],"refining":[88],"queries":[89,121],"based":[90,107,133],"user":[92],"relevance":[93],"feedback":[94],"multi-turn":[97],"setting.":[98],"This":[99],"incorporates":[101],"vision":[103],"language":[104,130],"model":[105,131],"(VLM)":[106],"captioner":[109],"enhance":[111],"quality":[113],"text-based":[115,137],"resulting":[117],"more":[119],"informative":[120],"with":[122,196],"each":[123,177],"iteration.":[124],"Moreover,":[125],"introduce":[127],"large":[129],"(LLM)":[132],"denoiser":[134],"refine":[136],"query":[138],"expansions,":[139],"mitigating":[140],"descriptions":[144],"generated":[145],"captioning":[147],"models.":[148],"evaluate":[150],"our":[151,187],"system,":[152,216],"curate":[154],"new":[156],"dataset":[157,164],"adapting":[159],"MSR-VTT":[161],"video":[162],"task,":[169],"offering":[170],"multiple":[171],"relevant":[172],"ground":[173],"truth":[174],"images":[175],"for":[176],"query.":[178],"Through":[179],"comprehensive":[180],"experiments,":[181],"validate":[183],"effectiveness":[185],"proposed":[188],"against":[190],"baseline":[191],"methods,":[192],"achieving":[193],"state-of-the-art":[194],"performance":[195],"notable":[198],"10%":[199],"improvement":[200],"terms":[202],"Our":[205],"contributions":[206],"encompass":[207],"development":[209],"innovative":[212],"integration":[218],"LLM-based":[221],"denoiser,":[222],"curation":[224],"meticulously":[227],"designed":[228],"evaluation":[229],"dataset,":[230],"thorough":[232],"experimental":[233],"validation.":[234]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-03T08:25:01.440150","created_date":"2025-10-10T00:00:00"}
