{"id":"https://openalex.org/W4414427909","doi":"https://doi.org/10.1145/3749189","title":"VecFlow: A High-Performance Vector Data Management System for Filtered-Search on GPUs","display_name":"VecFlow: A High-Performance Vector Data Management System for Filtered-Search on GPUs","publication_year":2025,"publication_date":"2025-09-22","ids":{"openalex":"https://openalex.org/W4414427909","doi":"https://doi.org/10.1145/3749189"},"language":"en","primary_location":{"id":"doi:10.1145/3749189","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3749189","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3749189","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119702701","display_name":"Jingyi Xi","orcid":"https://orcid.org/0009-0008-9425-0655"},"institutions":[{"id":"https://openalex.org/I183874917","display_name":"Urbana University","ror":"https://ror.org/04kp3hw27","country_code":"US","type":"education","lineage":["https://openalex.org/I183874917"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jingyi Xi","raw_affiliation_strings":["SSAIL Lab, UIUC, Urbana, USA"],"raw_orcid":"https://orcid.org/0009-0008-9425-0655","affiliations":[{"raw_affiliation_string":"SSAIL Lab, UIUC, Urbana, USA","institution_ids":["https://openalex.org/I183874917"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065967981","display_name":"Chenghao Mo","orcid":"https://orcid.org/0009-0003-9860-6325"},"institutions":[{"id":"https://openalex.org/I183874917","display_name":"Urbana University","ror":"https://ror.org/04kp3hw27","country_code":"US","type":"education","lineage":["https://openalex.org/I183874917"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chenghao Mo","raw_affiliation_strings":["SSAIL Lab, UIUC, Urbana, USA"],"raw_orcid":"https://orcid.org/0009-0003-9860-6325","affiliations":[{"raw_affiliation_string":"SSAIL Lab, UIUC, Urbana, USA","institution_ids":["https://openalex.org/I183874917"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057388897","display_name":"Ben Karsin","orcid":"https://orcid.org/0000-0003-4011-8903"},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Ben Karsin","raw_affiliation_strings":["Nvidia, Honolulu, USA"],"raw_orcid":"https://orcid.org/0000-0003-4011-8903","affiliations":[{"raw_affiliation_string":"Nvidia, Honolulu, USA","institution_ids":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089442943","display_name":"Artem Chirkin","orcid":"https://orcid.org/0000-0001-6936-2040"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Artem Chirkin","raw_affiliation_strings":["Nvidia, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0000-0001-6936-2040","affiliations":[{"raw_affiliation_string":"Nvidia, Zurich, Switzerland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103208235","display_name":"Mingqin Li","orcid":"https://orcid.org/0009-0002-0270-9489"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mingqin Li","raw_affiliation_strings":["Microsoft, Redmond, USA"],"raw_orcid":"https://orcid.org/0009-0002-0270-9489","affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077768924","display_name":"Minjia Zhang","orcid":"https://orcid.org/0000-0002-8165-166X"},"institutions":[{"id":"https://openalex.org/I183874917","display_name":"Urbana University","ror":"https://ror.org/04kp3hw27","country_code":"US","type":"education","lineage":["https://openalex.org/I183874917"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Minjia Zhang","raw_affiliation_strings":["SSAIL Lab, UIUC, Urbana, USA"],"raw_orcid":"https://orcid.org/0000-0002-8165-166X","affiliations":[{"raw_affiliation_string":"SSAIL Lab, UIUC, Urbana, USA","institution_ids":["https://openalex.org/I183874917"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1204768,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"3","issue":"4","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.652999997138977},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5278000235557556},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4296000003814697},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.4198000133037567},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.40860000252723694},{"id":"https://openalex.org/keywords/batch-processing","display_name":"Batch processing","score":0.3779999911785126},{"id":"https://openalex.org/keywords/vector-processor","display_name":"Vector processor","score":0.37459999322891235},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.3492000102996826},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.3434999883174896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8291000127792358},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.652999997138977},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5278000235557556},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5194000005722046},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4296000003814697},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.4198000133037567},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41679999232292175},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.40860000252723694},{"id":"https://openalex.org/C172658912","wikidata":"https://www.wikidata.org/wiki/Q661613","display_name":"Batch processing","level":2,"score":0.3779999911785126},{"id":"https://openalex.org/C161824985","wikidata":"https://www.wikidata.org/wiki/Q919509","display_name":"Vector processor","level":2,"score":0.37459999322891235},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.3492000102996826},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.3434999883174896},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.3359000086784363},{"id":"https://openalex.org/C125583679","wikidata":"https://www.wikidata.org/wiki/Q755673","display_name":"Search algorithm","level":2,"score":0.33559998869895935},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3068999946117401},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2793999910354614},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2743000090122223},{"id":"https://openalex.org/C1668388","wikidata":"https://www.wikidata.org/wiki/Q1149776","display_name":"Data management","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.27000001072883606},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C19012869","wikidata":"https://www.wikidata.org/wiki/Q578372","display_name":"Response time","level":2,"score":0.2669999897480011},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2542000114917755}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3749189","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3749189","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3749189","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3749189","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2080234606","https://openalex.org/W2110026675","https://openalex.org/W2133995768","https://openalex.org/W2136189984","https://openalex.org/W2151135734","https://openalex.org/W2165558283","https://openalex.org/W2250384498","https://openalex.org/W2560674852","https://openalex.org/W2906958661","https://openalex.org/W2914304175","https://openalex.org/W2963265099","https://openalex.org/W2982036057","https://openalex.org/W2992678377","https://openalex.org/W3029865833","https://openalex.org/W3085011441","https://openalex.org/W4230435573","https://openalex.org/W4301953218","https://openalex.org/W4367046898","https://openalex.org/W4390962868","https://openalex.org/W4396758529","https://openalex.org/W4399174383","https://openalex.org/W4400641571","https://openalex.org/W4404181049"],"related_works":[],"abstract_inverted_index":{"Vector":[0],"search":[1,42,84,108],"and":[2,58,91,107,138,142,144],"database":[3],"systems":[4],"have":[5],"become":[6],"a":[7,79,103],"keystone":[8],"component":[9],"in":[10],"many":[11,15],"AI":[12,30],"applications.":[13],"While":[14],"prior":[16],"research":[17],"has":[18],"investigated":[19],"how":[20],"to":[21,121,178,188],"accelerate":[22],"the":[23,67,113],"performance":[24,60],"of":[25,61,66,115],"generic":[26],"vector":[27,36,41,82],"search,":[28],"emerging":[29],"applications":[31],"require":[32],"running":[33],"more":[34],"sophisticated":[35],"queries":[37],"efficiently,":[38],"such":[39,173],"as":[40,174],"with":[43,55,117],"attribute":[44],"filters.":[45,118],"Unfortunately,":[46],"recent":[47],"filtered-ANNS":[48,62,98],"solutions":[49,172],"are":[50],"primarily":[51],"designed":[52],"for":[53,97,129,166],"CPUs,":[54],"few":[56],"exploration":[57],"limited":[59],"that":[63,86,110,160],"take":[64],"advantage":[65],"massive":[68],"parallelism":[69],"offered":[70],"by":[71,176],"GPUs.":[72,100],"In":[73,119],"this":[74],"paper,":[75],"we":[76,125],"present":[77],"VecFlow,":[78],"novel":[80,104],"high-performance":[81],"filtered":[83],"system":[85],"achieves":[87,162],"unprecedented":[88],"high":[89,189],"throughput":[90],"recall":[92,167,190],"while":[93],"obtaining":[94],"low":[95],"latency":[96],"on":[99,150],"We":[101],"propose":[102],"label-centric":[105],"indexing":[106],"algorithm":[109],"significantly":[111],"improves":[112],"selectivity":[114],"ANNS":[116],"addition":[120],"algorithmic":[122],"level":[123],"optimization,":[124],"provide":[126],"architecture-aware":[127],"optimizations":[128],"VecFlow's":[130],"functional":[131],"modules,":[132],"effectively":[133],"supporting":[134],"both":[135],"small":[136],"batch":[137,140],"large":[139],"queries,":[141],"single-label":[143],"multi-label":[145],"query":[146],"processing.":[147],"Experimental":[148],"results":[149],"NVIDIA":[151],"A100":[152],"GPU":[153],"over":[154],"several":[155],"public":[156],"available":[157],"datasets":[158],"validate":[159],"VecFlow":[161,182],"5":[163],"million":[164],"QPS":[165],"90%,":[168],"outperforming":[169],"state-of-the-art":[170],"CPU-based":[171],"Filtered-DiskANN":[175],"up":[177],"135":[179],"times.":[180],"Alternatively,":[181],"can":[183],"easily":[184],"extend":[185],"its":[186],"support":[187],"99%":[191],"regime,":[192],"whereas":[193],"strong":[194],"GPU-based":[195],"baselines":[196],"plateau":[197],"at":[198],"around":[199],"80%":[200],"recall.":[201]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
