{"id":"https://openalex.org/W7131400681","doi":"https://doi.org/10.48550/arxiv.2602.19549","title":"Sculpting the Vector Space: Towards Efficient Multi-Vector Visual Document Retrieval via Prune-then-Merge Framework","display_name":"Sculpting the Vector Space: Towards Efficient Multi-Vector Visual Document Retrieval via Prune-then-Merge Framework","publication_year":2026,"publication_date":"2026-02-23","ids":{"openalex":"https://openalex.org/W7131400681","doi":"https://doi.org/10.48550/arxiv.2602.19549"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.19549","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19549","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.19549","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126833362","display_name":"Yibo Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yan, Yibo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007900661","display_name":"Mingdong Ou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ou, Mingdong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126795710","display_name":"Yi Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Yi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126814576","display_name":"Xin Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080566405","display_name":"Jiahao Huo","orcid":"https://orcid.org/0000-0001-6686-2576"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huo, Jiahao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126798572","display_name":"Shuliang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Shuliang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126828824","display_name":"James Kwok","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwok, James","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126795722","display_name":"Xuming Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Xuming","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5126833362"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.7067999839782715,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.7067999839782715,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.18559999763965607,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.02930000051856041,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.652999997138977},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5623000264167786},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5095000267028809},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5055999755859375},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.4065999984741211},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.40059998631477356},{"id":"https://openalex.org/keywords/image-compression","display_name":"Image compression","score":0.3720000088214874},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.36809998750686646},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.36419999599456787}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8611000180244446},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.652999997138977},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5623000264167786},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5478000044822693},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.510200023651123},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5095000267028809},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5055999755859375},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.4065999984741211},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4041999876499176},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.40059998631477356},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.3720000088214874},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.36809998750686646},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.36419999599456787},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.3538999855518341},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3529999852180481},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.34869998693466187},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.325300008058548},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.3116999864578247},{"id":"https://openalex.org/C94835093","wikidata":"https://www.wikidata.org/wiki/Q3113333","display_name":"Data compression ratio","level":5,"score":0.30309998989105225},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.2793999910354614},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2782000005245209},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.19549","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19549","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.19549","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19549","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Visual":[0],"Document":[1],"Retrieval":[2],"(VDR),":[3],"which":[4],"aims":[5],"to":[6,82],"retrieve":[7],"relevant":[8],"pages":[9],"within":[10],"vast":[11],"corpora":[12],"of":[13,17,92],"visually-rich":[14],"documents,":[15],"is":[16],"significance":[18],"in":[19,29,113],"current":[20,39],"multimodal":[21],"retrieval":[22],"applications.":[23],"The":[24],"state-of-the-art":[25],"multi-vector":[26],"paradigm":[27],"excels":[28],"performance":[30,139],"but":[31],"suffers":[32],"from":[33],"prohibitive":[34],"overhead,":[35],"a":[36,49,65,88,95],"problem":[37],"that":[38,69,123],"efficiency":[40],"methods":[41],"like":[42],"pruning":[43,80],"and":[44,55,136],"merging":[45,97],"address":[46],"imperfectly,":[47],"creating":[48,87],"difficult":[50],"trade-off":[51],"between":[52],"compression":[53,134,142],"rate":[54],"feature":[56,110],"fidelity.":[57],"To":[58],"overcome":[59],"this":[60,100],"dilemma,":[61],"we":[62],"introduce":[63],"Prune-then-Merge,":[64],"novel":[66],"two-stage":[67],"framework":[68,125],"synergizes":[70],"these":[71],"complementary":[72],"approaches.":[73],"Our":[74],"method":[75],"first":[76],"employs":[77],"an":[78],"adaptive":[79],"stage":[81,98],"filter":[83],"out":[84],"low-information":[85],"patches,":[86],"refined,":[89],"high-signal":[90],"set":[91],"embeddings.":[93],"Subsequently,":[94],"hierarchical":[96],"compresses":[99],"pre-filtered":[101],"set,":[102],"effectively":[103],"summarizing":[104],"semantic":[105],"content":[106],"without":[107],"the":[108,132],"noise-induced":[109],"dilution":[111],"seen":[112],"single-stage":[114],"methods.":[115],"Extensive":[116],"experiments":[117],"on":[118],"29":[119],"VDR":[120],"datasets":[121],"demonstrate":[122],"our":[124],"consistently":[126],"outperforms":[127],"existing":[128],"methods,":[129],"significantly":[130],"extending":[131],"near-lossless":[133],"range":[135],"providing":[137],"robust":[138],"at":[140],"high":[141],"ratios.":[143]},"counts_by_year":[],"updated_date":"2026-02-26T06:34:08.959763","created_date":"2026-02-26T00:00:00"}
