{"id":"https://openalex.org/W4399344197","doi":"https://doi.org/10.1109/tkde.2024.3408815","title":"Optimizing the Number of Clusters for Billion-Scale Quantization-Based Nearest Neighbor Search","display_name":"Optimizing the Number of Clusters for Billion-Scale Quantization-Based Nearest Neighbor Search","publication_year":2024,"publication_date":"2024-06-04","ids":{"openalex":"https://openalex.org/W4399344197","doi":"https://doi.org/10.1109/tkde.2024.3408815"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2024.3408815","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2024.3408815","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017240878","display_name":"Yujian Fu","orcid":"https://orcid.org/0009-0006-9256-295X"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yujian Fu","raw_affiliation_strings":["School of Computing, National University of Singapore, Singapore"],"raw_orcid":"https://orcid.org/0009-0006-9256-295X","affiliations":[{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010327546","display_name":"Cheng Chen","orcid":"https://orcid.org/0000-0002-2622-4075"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng Chen","raw_affiliation_strings":["ByteDance Inc., Beijing, China","ByteDance Inc"],"raw_orcid":"https://orcid.org/0000-0002-2622-4075","affiliations":[{"raw_affiliation_string":"ByteDance Inc., Beijing, China","institution_ids":[]},{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiaohui Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaohui Chen","raw_affiliation_strings":["ByteDance Inc., Beijing, China","ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc., Beijing, China","institution_ids":[]},{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023989495","display_name":"Weng\u2010Fai Wong","orcid":"https://orcid.org/0000-0002-4281-2053"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Weng-Fai Wong","raw_affiliation_strings":["School of Computing, National University of Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-4281-2053","affiliations":[{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039946576","display_name":"Bingsheng He","orcid":"https://orcid.org/0000-0001-8618-4581"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Bingsheng He","raw_affiliation_strings":["School of Computing, National University of Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0001-8618-4581","affiliations":[{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.14,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.87932588,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"36","issue":"11","first_page":"6786","last_page":"6800"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9686999917030334,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9686999917030334,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.953499972820282,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9136000275611877,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nearest-neighbor-search","display_name":"Nearest neighbor search","score":0.7371084690093994},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.661935031414032},{"id":"https://openalex.org/keywords/best-bin-first","display_name":"Best bin first","score":0.6502176523208618},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.6342482566833496},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.45894327759742737},{"id":"https://openalex.org/keywords/cover-tree","display_name":"Cover tree","score":0.44691216945648193},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.42178255319595337},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3657902777194977},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29954543709754944},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.2545172870159149},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07835900783538818}],"concepts":[{"id":"https://openalex.org/C116738811","wikidata":"https://www.wikidata.org/wiki/Q608751","display_name":"Nearest neighbor search","level":2,"score":0.7371084690093994},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.661935031414032},{"id":"https://openalex.org/C161986146","wikidata":"https://www.wikidata.org/wiki/Q4896845","display_name":"Best bin first","level":3,"score":0.6502176523208618},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.6342482566833496},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.45894327759742737},{"id":"https://openalex.org/C53661774","wikidata":"https://www.wikidata.org/wiki/Q13108095","display_name":"Cover tree","level":5,"score":0.44691216945648193},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.42178255319595337},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3657902777194977},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29954543709754944},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.2545172870159149},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07835900783538818},{"id":"https://openalex.org/C104047586","wikidata":"https://www.wikidata.org/wiki/Q5033439","display_name":"Canopy clustering algorithm","level":4,"score":0.0},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2024.3408815","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2024.3408815","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320320698","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W580280495","https://openalex.org/W642889137","https://openalex.org/W1627400044","https://openalex.org/W1888005072","https://openalex.org/W2086179657","https://openalex.org/W2090212574","https://openalex.org/W2111006384","https://openalex.org/W2117281615","https://openalex.org/W2118371392","https://openalex.org/W2119913432","https://openalex.org/W2124509324","https://openalex.org/W2128017662","https://openalex.org/W2132234208","https://openalex.org/W2133542527","https://openalex.org/W2133995768","https://openalex.org/W2147717514","https://openalex.org/W2165489143","https://openalex.org/W2165558283","https://openalex.org/W2171572695","https://openalex.org/W2294518132","https://openalex.org/W2300352888","https://openalex.org/W2427312773","https://openalex.org/W2511638354","https://openalex.org/W2513636497","https://openalex.org/W2757662681","https://openalex.org/W2772923331","https://openalex.org/W2797054769","https://openalex.org/W2913688486","https://openalex.org/W2963213349","https://openalex.org/W2963265099","https://openalex.org/W2963284996","https://openalex.org/W2963469388","https://openalex.org/W2998241033","https://openalex.org/W3029693508","https://openalex.org/W3037277842","https://openalex.org/W3094858795","https://openalex.org/W4381610063","https://openalex.org/W6635035540","https://openalex.org/W6777399232","https://openalex.org/W6786109342"],"related_works":["https://openalex.org/W4246757943","https://openalex.org/W1558159560","https://openalex.org/W2182477562","https://openalex.org/W1595303882","https://openalex.org/W2902799860","https://openalex.org/W4289129280","https://openalex.org/W3096071782","https://openalex.org/W2375128115","https://openalex.org/W325985789","https://openalex.org/W2119808169"],"abstract_inverted_index":{"Approximate":[0],"nearest":[1],"neighbor":[2],"search":[3,83,117],"(ANNS)":[4],"is":[5,214],"crucial":[6],"in":[7,53,136,170],"various":[8],"real-world":[9],"applications,":[10],"including":[11],"recommendation":[12],"systems,":[13,55],"data":[14,51],"mining,":[15],"and":[16,70,203],"image":[17],"retrieval.":[18],"To":[19],"date,":[20],"quantization-based":[21,54,112,137,171,201],"algorithms":[22],"have":[23],"emerged":[24],"as":[25,129],"one":[26],"of":[27,40,44,64,73,108,155,161,183],"the":[28,38,41,71,87,105,130,146,153,159,165,181,185],"most":[29],"efficient":[30],"solutions":[31],"for":[32,49,76,110,124,132],"ANNS":[33,113,147,172],"on":[34,193],"billion-scale":[35,194,220],"datasets.":[36,221],"However,":[37],"determination":[39],"optimal":[42,106],"number":[43,107,154,182],"clusters,":[45,184],"a":[46,99],"critical":[47],"factor":[48],"peak":[50],"performance":[52,84,192],"remains":[56],"inadequately":[57],"explored.":[58],"Previous":[59],"works":[60],"often":[61],"propose":[62,120],"numbers":[63,135],"clusters":[65,109],"that":[66,102],"are":[67],"not":[68],"optimal,":[69],"absence":[72],"effective":[74],"methodologies":[75],"tuning":[77],"this":[78,94,96],"parameter":[79],"leads":[80],"to":[81,86,93,115,145,198,219],"suboptimal":[82],"due":[85],"vast":[88],"configuration":[89],"space.":[90],"In":[91],"response":[92],"challenge,":[95],"paper":[97],"introduces":[98],"novel":[100],"algorithm":[101,140,213],"automatically":[103],"identifies":[104],"billion-scale,":[111],"systems":[114],"maximize":[116],"efficiency.":[118],"We":[119,157],"an":[121],"analytical":[122],"model":[123],"evaluating":[125],"retrieval":[126,191],"performance,":[127],"serving":[128],"benchmark":[131],"optimizing":[133,180],"cluster":[134],"indexes.":[138],"Our":[139,174],"applies":[141],"iterative":[142],"local":[143],"adjustments":[144],"index":[148,168,188],"being":[149],"constructed,":[150],"progressively":[151],"refining":[152],"clusters.":[156],"demonstrate":[158],"efficacy":[160],"our":[162,211],"approach":[163],"using":[164],"popular":[166],"inverted":[167,187],"structure":[169],"systems.":[173],"findings":[175],"indicate":[176],"that:":[177],"(1)":[178],"By":[179],"vanilla":[186],"exhibits":[189],"improved":[190],"datasets":[195],"when":[196,217],"compared":[197],"existing":[199],"state-of-the-art":[200],"methods;":[202],"(2)":[204],"The":[205],"additional":[206],"computational":[207],"overhead":[208],"introduced":[209],"by":[210],"optimization":[212],"minimal,":[215],"even":[216],"applied":[218]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
