{"id":"https://openalex.org/W7164825370","doi":"https://doi.org/10.1145/3805622.3810774","title":"Adaptive Multi-Agent Reasoning for Text-to-Video Retrieval","display_name":"Adaptive Multi-Agent Reasoning for Text-to-Video Retrieval","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164825370","doi":"https://doi.org/10.1145/3805622.3810774"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810774","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810774","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810774","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056908913","display_name":"Jiaxin Wu","orcid":"https://orcid.org/0000-0003-4074-3442"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]},{"id":"https://openalex.org/I182722699","display_name":"Shenzhen Polytechnic University","ror":"https://ror.org/00d2w9g53","country_code":"CN","type":"education","lineage":["https://openalex.org/I182722699"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Jiaxin Wu","raw_affiliation_strings":["School of Artificial Intelligence, Shenzhen University, Shenzhen, Guangdong, China and Department of Computing, The Hong Kong Polytechnic University, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0003-4074-3442","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Shenzhen University, Shenzhen, Guangdong, China and Department of Computing, The Hong Kong Polytechnic University, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I14243506","https://openalex.org/I182722699","https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064374603","display_name":"Xiao-Yong Wei","orcid":"https://orcid.org/0000-0002-5706-5177"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xiao-Yong Wei","raw_affiliation_strings":["College of Computer Science, Sichuan University, Chengdu, China and Department of Computing, The Hong Kong Polytechnic University, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-5706-5177","affiliations":[{"raw_affiliation_string":"College of Computer Science, Sichuan University, Chengdu, China and Department of Computing, The Hong Kong Polytechnic University, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100404176","display_name":"Qing Li","orcid":"https://orcid.org/0000-0003-3370-471X"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Qing Li","raw_affiliation_strings":["Department of Computing, The Hong Kong Polytechnic University, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0003-3370-471X","affiliations":[{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93595248,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1598","last_page":"1607"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9783999919891357,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9783999919891357,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.006099999882280827,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.002899999963119626,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.652899980545044},{"id":"https://openalex.org/keywords/orchestration","display_name":"Orchestration","score":0.5565000176429749},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.5120000243186951},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.4521999955177307},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.391400009393692},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.38530001044273376},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3278999924659729}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8500000238418579},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.652899980545044},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.5565000176429749},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5221999883651733},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.5120000243186951},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.4521999955177307},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.391400009393692},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.38530001044273376},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3366999924182892},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3278999924659729},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3273000121116638},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.322299987077713},{"id":"https://openalex.org/C2983174267","wikidata":"https://www.wikidata.org/wiki/Q3775098","display_name":"Video retrieval","level":2,"score":0.321399986743927},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.31150001287460327},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3052999973297119},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.275299996137619},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.25369998812675476}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810774","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810774","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810774","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810774","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7365890145301819,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G4857298105","display_name":null,"funder_award_id":"62372314","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2062903088","https://openalex.org/W2109271964","https://openalex.org/W2753311918","https://openalex.org/W2975813532","https://openalex.org/W3033977115","https://openalex.org/W3035356601","https://openalex.org/W3093002089","https://openalex.org/W3107973541","https://openalex.org/W4210565368","https://openalex.org/W4211053420","https://openalex.org/W4225384855","https://openalex.org/W4312777110","https://openalex.org/W4399423018","https://openalex.org/W4409811348","https://openalex.org/W4411019516","https://openalex.org/W4413144376","https://openalex.org/W4413145306","https://openalex.org/W4413146621","https://openalex.org/W4413155274","https://openalex.org/W4416034825"],"related_works":[],"abstract_inverted_index":{"The":[0,84,192],"rise":[1],"of":[2,9,81],"short-form":[3],"video":[4,96],"platforms":[5],"and":[6,107,117,139,156,163,183],"the":[7,17,79],"emergence":[8],"multimodal":[10],"large":[11,95,190],"language":[12],"models":[13],"(MLLMs)":[14],"have":[15,32],"amplified":[16],"need":[18],"for":[19,91,102,113,120],"scalable,":[20],"effective,":[21],"zero-shot":[22,34,103],"text-to-video":[23],"retrieval":[24,66,89,93],"systems.":[25],"While":[26],"recent":[27],"advances":[28],"in":[29],"large-scale":[30],"pretraining":[31],"improved":[33],"cross-modal":[35],"alignment,":[36],"existing":[37],"methods":[38,187],"still":[39],"struggle":[40],"with":[41],"query-dependent":[42],"temporal":[43,105],"reasoning,":[44,106],"limiting":[45],"their":[46],"effectiveness":[47],"on":[48,78,166],"complex":[49],"queries":[50,116],"involving":[51],"temporal,":[52],"logical,":[53],"or":[54],"causal":[55],"relationships.":[56],"To":[57],"address":[58],"these":[59],"limitations,":[60],"we":[61],"propose":[62],"an":[63,132],"adaptive":[64],"multi-agent":[65],"framework":[67,85,176],"that":[68,122,152,174],"dynamically":[69,129],"orchestrates":[70],"specialized":[71],"agents":[72,127],"over":[73,94,124,181],"multiple":[74],"reasoning":[75,100,140,158],"iterations":[76],"based":[77],"demands":[80],"each":[82],"query.":[83],"includes:":[86],"(1)":[87],"a":[88,99,109,148,178,189],"agent":[90,101,112],"scalable":[92],"corpora,":[97],"(2)":[98],"contextual":[104],"(3)":[108],"query":[110],"reformulation":[111],"refining":[114],"ambiguous":[115],"recovering":[118],"performance":[119],"those":[121],"degrade":[123],"iterations.":[125],"These":[126],"are":[128],"coordinated":[130],"by":[131,188],"orchestration":[133],"agent,":[134],"which":[135],"leverages":[136],"intermediate":[137],"feedback":[138],"outcomes":[141],"to":[142,160],"guide":[143],"execution.":[144],"We":[145],"also":[146],"introduce":[147],"novel":[149],"communication":[150],"mechanism":[151],"incorporates":[153],"retrieval-performance":[154],"memory":[155],"historical":[157],"traces":[159],"improve":[161],"coordination":[162],"decision-making.":[164],"Experiments":[165],"three":[167],"TRECVid":[168],"benchmarks":[169],"spanning":[170],"eight":[171],"years":[172],"show":[173],"our":[175],"achieves":[177],"twofold":[179],"improvement":[180],"CLIP4Clip":[182],"significantly":[184],"outperforms":[185],"state-of-the-art":[186],"margin.":[191],"code":[193],"is":[194],"available":[195],"at":[196],"https://github.com/nikkiwoo-gh/multi-agent-retrieval.":[197]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
