{"id":"https://openalex.org/W4409248678","doi":"https://doi.org/10.1109/hpca61900.2025.00065","title":"NeuVSA: A Unified and Efficient Accelerator for Neural Vector Search","display_name":"NeuVSA: A Unified and Efficient Accelerator for Neural Vector Search","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4409248678","doi":"https://doi.org/10.1109/hpca61900.2025.00065"},"language":"en","primary_location":{"id":"doi:10.1109/hpca61900.2025.00065","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00065","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054765014","display_name":"Ziming Yuan","orcid":"https://orcid.org/0009-0005-4966-3184"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ziming Yuan","raw_affiliation_strings":["Institute of Computing Technology,CAS,State Key Lab of Processors"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,CAS,State Key Lab of Processors","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060490956","display_name":"Lei Dai","orcid":"https://orcid.org/0000-0002-9498-384X"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Dai","raw_affiliation_strings":["Institute of Computing Technology,CAS,State Key Lab of Processors"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,CAS,State Key Lab of Processors","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104195047","display_name":"Wen Li","orcid":null},"institutions":[{"id":"https://openalex.org/I181877577","display_name":"Shanxi University","ror":"https://ror.org/03y3e3s17","country_code":"CN","type":"education","lineage":["https://openalex.org/I181877577"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Li","raw_affiliation_strings":["Shanxi University,School of Computer and Information Technology"],"affiliations":[{"raw_affiliation_string":"Shanxi University,School of Computer and Information Technology","institution_ids":["https://openalex.org/I181877577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101711826","display_name":"Jie Zhang","orcid":"https://orcid.org/0000-0001-9803-7140"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Zhang","raw_affiliation_strings":["Peking University,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Computer Science","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018381533","display_name":"Shengwen Liang","orcid":"https://orcid.org/0000-0001-8407-2594"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengwen Liang","raw_affiliation_strings":["Institute of Computing Technology,CAS,State Key Lab of Processors"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,CAS,State Key Lab of Processors","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100346965","display_name":"Ying Wang","orcid":"https://orcid.org/0000-0001-5172-4736"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Wang","raw_affiliation_strings":["Institute of Computing Technology,CAS,State Key Lab of Processors"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,CAS,State Key Lab of Processors","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100384309","display_name":"Cheng Liu","orcid":"https://orcid.org/0000-0002-5542-7306"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Liu","raw_affiliation_strings":["Institute of Computing Technology,CAS,State Key Lab of Processors"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,CAS,State Key Lab of Processors","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100768288","display_name":"Huawei Li","orcid":"https://orcid.org/0000-0001-8082-4218"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huawei Li","raw_affiliation_strings":["Institute of Computing Technology,CAS,State Key Lab of Processors"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,CAS,State Key Lab of Processors","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023380073","display_name":"Xiaowei Li","orcid":"https://orcid.org/0000-0002-0874-814X"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaowei Li","raw_affiliation_strings":["Institute of Computing Technology,CAS,State Key Lab of Processors"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,CAS,State Key Lab of Processors","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088621320","display_name":"Jiafeng Guo","orcid":"https://orcid.org/0000-0002-9509-8674"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiafeng Guo","raw_affiliation_strings":["Institute of Computing Technology, CAS,Key Lab of Network Data Science and Technology"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,Key Lab of Network Data Science and Technology","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039000647","display_name":"Peng Wang","orcid":"https://orcid.org/0000-0003-3559-8244"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Wang","raw_affiliation_strings":["Huawei Technologies Co., Ltd.,China"],"affiliations":[{"raw_affiliation_string":"Huawei Technologies Co., Ltd.,China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054084905","display_name":"Renhai Chen","orcid":"https://orcid.org/0000-0002-0233-5838"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Renhai Chen","raw_affiliation_strings":["Huawei Technologies Co., Ltd.,China"],"affiliations":[{"raw_affiliation_string":"Huawei Technologies Co., Ltd.,China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100368312","display_name":"Gong Zhang","orcid":"https://orcid.org/0000-0003-0283-7050"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gong Zhang","raw_affiliation_strings":["Huawei Technologies Co., Ltd.,China"],"affiliations":[{"raw_affiliation_string":"Huawei Technologies Co., Ltd.,China","institution_ids":["https://openalex.org/I2250955327"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5054765014"],"corresponding_institution_ids":["https://openalex.org/I4210090176"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03344726,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"790","last_page":"805"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10820","display_name":"Fuzzy Logic and Control Systems","score":0.9585999846458435,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9390000104904175,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.639227032661438},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4239307641983032},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4122200310230255}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.639227032661438},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4239307641983032},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4122200310230255}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca61900.2025.00065","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00065","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1644552752","https://openalex.org/W1898304433","https://openalex.org/W2069870183","https://openalex.org/W2108598243","https://openalex.org/W2179513480","https://openalex.org/W2289252105","https://openalex.org/W2293155043","https://openalex.org/W2412479940","https://openalex.org/W2725159389","https://openalex.org/W2799244653","https://openalex.org/W2885195348","https://openalex.org/W2892054964","https://openalex.org/W2896457183","https://openalex.org/W2901613577","https://openalex.org/W2907492528","https://openalex.org/W2950841002","https://openalex.org/W2963469388","https://openalex.org/W2964341035","https://openalex.org/W2979602826","https://openalex.org/W2979826702","https://openalex.org/W2998702515","https://openalex.org/W3011056378","https://openalex.org/W3017228913","https://openalex.org/W3020646731","https://openalex.org/W3031273498","https://openalex.org/W3036320503","https://openalex.org/W3085011441","https://openalex.org/W3089996945","https://openalex.org/W3159727696","https://openalex.org/W3168051837","https://openalex.org/W3174809957","https://openalex.org/W3187788856","https://openalex.org/W3196481040","https://openalex.org/W3209791570","https://openalex.org/W4226143364","https://openalex.org/W4280568654","https://openalex.org/W4293024115","https://openalex.org/W4306317315","https://openalex.org/W4308083513","https://openalex.org/W4377704444","https://openalex.org/W4380874786","https://openalex.org/W4381610063","https://openalex.org/W4387212331","https://openalex.org/W4388031336","https://openalex.org/W4388757726","https://openalex.org/W4402670856","https://openalex.org/W6713134421","https://openalex.org/W6737616726","https://openalex.org/W6750615492","https://openalex.org/W6762535384","https://openalex.org/W6765119170","https://openalex.org/W6766978945","https://openalex.org/W6768817161","https://openalex.org/W6776073429","https://openalex.org/W6802167090","https://openalex.org/W6838874672","https://openalex.org/W6853131745","https://openalex.org/W7019128387"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Neural":[0],"Vector":[1],"Search":[2],"(NVS)":[3],"has":[4],"exhibited":[5],"superior":[6],"search":[7,45,59,97],"quality":[8],"over":[9,278,286,297,314,326],"traditional":[10],"key-based":[11],"strategies":[12],"for":[13,197,251,263],"information":[14],"retrieval":[15],"tasks.":[16],"An":[17],"effective":[18],"NVS":[19,37,146,168,216,222],"architecture":[20,65,154,210],"requires":[21],"high":[22,27],"recall,":[23,133],"low":[24],"latency,":[25,134],"and":[26,33,43,57,64,85,114,123,135,144,153,179,195,200,242,282,301,303,308,316],"throughput":[28,313],"to":[29,50,78,130,190,228,247,292,305],"enhance":[30],"user":[31],"experience":[32],"cost-efficiency.":[34],"However,":[35,81],"implementing":[36],"on":[38,89,151,158,219],"existing":[39,82,105],"neural":[40],"network":[41],"accelerators":[42,46,86],"vector":[44,58,96],"is":[47,226],"sub-optimal":[48],"due":[49],"the":[51,54,91,95,109,124,159,176,192,209,220,270,279],"separation":[52,113],"between":[53],"embedding":[55,92],"stage":[56,60,93],"at":[61],"both":[62,101],"algorithm":[63,152,160,169],"levels.":[66],"Fortunately,":[67],"we":[68,140,162,212],"unveil":[69],"that":[70,170,269],"Product":[71],"Quantization":[72],"(PQ)":[73],"opens":[74],"up":[75,304],"an":[76,185,243],"opportunity":[77],"break":[79],"separation.":[80],"PQ":[83,202],"algorithms":[84],"still":[87,107],"focus":[88],"either":[90],"or":[94],"stage,":[98],"rather":[99],"than":[100],"simultaneously.":[102],"Simply":[103],"combining":[104],"solutions":[106],"follows":[108],"beaten":[110],"track":[111],"of":[112,126],"suffers":[115],"from":[116],"insufficient":[117],"parallelization,":[118],"frequent":[119],"data":[120],"access":[121,181],"conflicts,":[122],"absence":[125],"scheduling,":[127],"thus":[128,204],"failing":[129],"reach":[131],"optimal":[132,193],"throughput.":[136,265],"To":[137],"this":[138],"end,":[139],"propose":[141,163],"a":[142,164,214,237,259],"unified":[143,167,221,234],"efficient":[145],"accelerator":[147,217],"dubbed":[148],"NeuVSA":[149,288,319],"based":[150,218],"co-design":[155],"philosophy.":[156],"Specifically,":[157],"level,":[161,211],"learned":[165],"PQ-based":[166],"consolidates":[171],"two":[172],"separate":[173],"stages":[174],"into":[175],"same":[177],"computing":[178],"memory":[180],"paradigm.":[182],"It":[183],"integrates":[184],"end-to-end":[186],"joint":[187,271],"training":[188,272],"strategy":[189,241,262,273,281],"learn":[191],"codebook":[194],"index":[196,239],"enhanced":[198],"recall":[199,275],"reduced":[201,252],"complexity,":[203],"achieving":[205],"smoother":[206],"acceleration.":[207],"On":[208],"customize":[213],"homogeneous":[215],"algorithm.":[223],"Each":[224],"sub-accelerator":[225],"optimized":[227],"exploit":[229],"all":[230],"parallelism":[231],"exposed":[232],"by":[233,276,284,324],"NVS,":[235],"incorporating":[236],"structured":[238],"assignment":[240],"elastic":[244],"on-chip":[245],"buffer":[246,249],"alleviate":[248],"conflicts":[250],"latency.":[253],"All":[254],"sub-accelerators":[255],"are":[256],"coordinated":[257],"using":[258],"hardware-aware":[260],"scheduling":[261],"boosted":[264],"Experimental":[266],"results":[267],"show":[268],"improves":[274],"4.6%":[277],"separated":[280],"accuracy":[283],"43.5%":[285],"LUT-NN.":[287],"achieves":[289],"$2.82":[290],"\\times$":[291,294,307,310],"$416.17":[293],"lower":[295],"latency":[296],"CPU,":[298],"GPU,":[299,317],"DFX+ANNA,":[300],"PQA+ANNA,":[302],"$49.60":[306],"$10.57":[309],"higher":[311],"average":[312],"CPU":[315],"respectively.":[318],"also":[320],"reduces":[321],"chip":[322],"area":[323],"65.2%":[325],"PQA+ANNA.":[327]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
