{"id":"https://openalex.org/W7155107654","doi":"https://doi.org/10.48550/arxiv.2604.17898","title":"ReTrack: Evidence-Driven Dual-Stream Directional Anchor Calibration Network for Composed Video Retrieval","display_name":"ReTrack: Evidence-Driven Dual-Stream Directional Anchor Calibration Network for Composed Video Retrieval","publication_year":2026,"publication_date":"2026-04-20","ids":{"openalex":"https://openalex.org/W7155107654","doi":"https://doi.org/10.48550/arxiv.2604.17898"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.17898","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17898","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.17898","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134189417","display_name":"Zixu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zixu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134181244","display_name":"Yupeng Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Yupeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134196302","display_name":"Zhiwei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126005311","display_name":"Qinlei Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Qinlei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109363958","display_name":"Guozhi Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Guozhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134201651","display_name":"Zhiheng Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Zhiheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134133635","display_name":"Meng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Meng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8442999720573425,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8442999720573425,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.05820000171661377,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.0357000008225441,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6463000178337097},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5234000086784363},{"id":"https://openalex.org/keywords/video-retrieval","display_name":"Video retrieval","score":0.5159000158309937},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.5091999769210815},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4934000074863434},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.48010000586509705},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.475600004196167},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4643000066280365}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019999861717224},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6463000178337097},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5234000086784363},{"id":"https://openalex.org/C2983174267","wikidata":"https://www.wikidata.org/wiki/Q3775098","display_name":"Video retrieval","level":2,"score":0.5159000158309937},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.5091999769210815},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4934000074863434},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.48010000586509705},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.475600004196167},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4643000066280365},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4634999930858612},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.4377000033855438},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.42570000886917114},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41620001196861267},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.397599995136261},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3547999858856201},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.35440000891685486},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.33059999346733093},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3077000081539154},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.302700012922287},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.29980000853538513},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.28529998660087585},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.27079999446868896}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.17898","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17898","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.17898","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17898","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,32,56,61,68,73,79,101,105,120,146,156,191,199,203,209,229],"rapid":[2],"growth":[3],"of":[4,40,47,122,133,174,194,202],"video":[5,18,29,43,91],"data,":[6],"Composed":[7,230],"Video":[8],"Retrieval":[9,232],"(CVR)":[10],"has":[11],"emerged":[12],"as":[13,50],"a":[14,36,41,45,84],"novel":[15],"paradigm":[16],"in":[17,87,169,242],"retrieval":[19,30,112,138],"and":[20,44,92,136,184,245],"is":[21,116,155],"receiving":[22],"increasing":[23],"attention":[24],"from":[25],"researchers.":[26],"Unlike":[27],"unimodal":[28],"methods,":[31],"CVR":[33,80,158,244],"task":[34],"takes":[35],"multi-modal":[37,162],"query":[38,163],"consisting":[39],"reference":[42,62,106],"piece":[46],"modification":[48,53],"text":[49,54,93],"input.":[51],"The":[52],"conveys":[55],"user's":[57],"intended":[58],"alterations":[59],"to":[60,71,99,110,119,197,213,228],"video.":[63,77],"Based":[64],"on":[65],"this":[66],"input,":[67],"model":[69],"aims":[70],"retrieve":[72],"most":[74],"relevant":[75],"target":[76],"In":[78],"task,":[81,234],"there":[82],"exists":[83],"substantial":[85],"discrepancy":[86],"information":[88],"density":[89],"between":[90],"modalities.":[94],"Traditional":[95],"composition":[96],"methods":[97],"tend":[98],"bias":[100,168,201],"composed":[102,134,170,204],"feature":[103],"toward":[104],"video,":[107],"which":[108],"leads":[109],"suboptimal":[111],"performance.":[113],"This":[114],"limitation":[115],"significant":[117],"due":[118],"presence":[121],"three":[123,175,239],"core":[124],"challenges:":[125],"(1)":[126],"modal":[127],"contribution":[128,193],"entanglement,":[129],"(2)":[130],"explicit":[131],"optimization":[132],"features,":[135],"(3)":[137],"uncertainty.":[139],"To":[140],"address":[141],"these":[142],"challenges,":[143],"we":[144],"propose":[145],"evidence-dRivRn":[147],"dual-sTream":[148],"diRectionAl":[149],"anChor":[150],"calibration":[151],"networK":[152],"(ReTrack).":[153],"ReTrack":[154,189,224],"first":[157],"framework":[159],"that":[160,217],"improves":[161],"understanding":[164],"by":[165],"calibrating":[166],"directional":[167,200,211],"features.":[171],"It":[172,206],"consists":[173],"key":[176],"modules:":[177],"Semantic":[178],"Contribution":[179],"Disentanglement,":[180],"Composition":[181],"Geometry":[182],"Calibration,":[183],"Reliable":[185],"Evidence-driven":[186],"Alignment.":[187],"Specifically,":[188],"estimates":[190],"semantic":[192],"each":[195],"modality":[196],"calibrate":[198],"feature.":[205],"then":[207],"uses":[208],"calibrated":[210],"anchors":[212],"compute":[214],"bidirectional":[215],"evidence":[216],"drives":[218],"reliable":[219],"composed-to-target":[220],"similarity":[221],"estimation.":[222],"Moreover,":[223],"exhibits":[225],"strong":[226],"generalization":[227],"Image":[231],"(CIR)":[233],"achieving":[235],"SOTA":[236],"performance":[237],"across":[238],"benchmark":[240],"datasets":[241],"both":[243],"CIR":[246],"scenarios.":[247],"Codes":[248],"are":[249],"available":[250],"at":[251],"https://github.com/Lee-zixu/ReTrack":[252]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-22T00:00:00"}
