{"id":"https://openalex.org/W4415621689","doi":"https://doi.org/10.1177/18724981251388888","title":"A comparative analysis of graph-based and partition-based approximate nearest neighbor search for large-scale entity resolution","display_name":"A comparative analysis of graph-based and partition-based approximate nearest neighbor search for large-scale entity resolution","publication_year":2025,"publication_date":"2025-10-28","ids":{"openalex":"https://openalex.org/W4415621689","doi":"https://doi.org/10.1177/18724981251388888"},"language":"en","primary_location":{"id":"doi:10.1177/18724981251388888","is_oa":false,"landing_page_url":"https://doi.org/10.1177/18724981251388888","pdf_url":null,"source":{"id":"https://openalex.org/S119727669","display_name":"Intelligent Decision Technologies","issn_l":"1872-4981","issn":["1872-4981","1875-8843"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Intelligent Decision Technologies","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021354735","display_name":"Dimitrios Karapiperis","orcid":"https://orcid.org/0000-0002-3878-5988"},"institutions":[{"id":"https://openalex.org/I183898223","display_name":"International Hellenic University","ror":"https://ror.org/00708jp83","country_code":"GR","type":"education","lineage":["https://openalex.org/I183898223"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Dimitrios Karapiperis","raw_affiliation_strings":["International Hellenic University, Thermi, Greece"],"raw_orcid":"https://orcid.org/0000-0002-3878-5988","affiliations":[{"raw_affiliation_string":"International Hellenic University, Thermi, Greece","institution_ids":["https://openalex.org/I183898223"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011200753","display_name":"Leonidas Akritidis","orcid":"https://orcid.org/0000-0001-6602-0723"},"institutions":[{"id":"https://openalex.org/I183898223","display_name":"International Hellenic University","ror":"https://ror.org/00708jp83","country_code":"GR","type":"education","lineage":["https://openalex.org/I183898223"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Leonidas Akritidis","raw_affiliation_strings":["International Hellenic University, Thermi, Greece"],"raw_orcid":"https://orcid.org/0000-0001-6602-0723","affiliations":[{"raw_affiliation_string":"International Hellenic University, Thermi, Greece","institution_ids":["https://openalex.org/I183898223"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011619520","display_name":"Panayiotis Bozanis","orcid":"https://orcid.org/0000-0001-9435-1829"},"institutions":[{"id":"https://openalex.org/I183898223","display_name":"International Hellenic University","ror":"https://ror.org/00708jp83","country_code":"GR","type":"education","lineage":["https://openalex.org/I183898223"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Panayiotis Bozanis","raw_affiliation_strings":["International Hellenic University, Thermi, Greece"],"raw_orcid":"https://orcid.org/0000-0001-9435-1829","affiliations":[{"raw_affiliation_string":"International Hellenic University, Thermi, Greece","institution_ids":["https://openalex.org/I183898223"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085113815","display_name":"Vassilios S. Verykios","orcid":"https://orcid.org/0000-0002-9758-0819"},"institutions":[{"id":"https://openalex.org/I231025917","display_name":"Hellenic Open University","ror":"https://ror.org/02kq26x23","country_code":"GR","type":"education","lineage":["https://openalex.org/I231025917"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Vassilios S Verykios","raw_affiliation_strings":["Hellenic Open University","Hellenic Open University, Patras, Greece"],"raw_orcid":"https://orcid.org/0000-0002-9758-0819","affiliations":[{"raw_affiliation_string":"Hellenic Open University","institution_ids":["https://openalex.org/I231025917"]},{"raw_affiliation_string":"Hellenic Open University, Patras, Greece","institution_ids":["https://openalex.org/I231025917"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5021354735"],"corresponding_institution_ids":["https://openalex.org/I183898223"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33477657,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"19","issue":"6","first_page":"3826","last_page":"3840"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.0007999999797903001,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.00039999998989515007,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7983999848365784},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6625000238418579},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6126000285148621},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5787000060081482},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.5436000227928162},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.5230000019073486},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.49149999022483826},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4124999940395355}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7983999848365784},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7675999999046326},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6625000238418579},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6126000285148621},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5978999733924866},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5787000060081482},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.5436000227928162},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.5230000019073486},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.49149999022483826},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43299999833106995},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4124999940395355},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.3970000147819519},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.39590001106262207},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.3882000148296356},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.34310001134872437},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.3425999879837036},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.325300008058548},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31779998540878296},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.3075999915599823},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C116738811","wikidata":"https://www.wikidata.org/wiki/Q608751","display_name":"Nearest neighbor search","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1177/18724981251388888","is_oa":false,"landing_page_url":"https://doi.org/10.1177/18724981251388888","pdf_url":null,"source":{"id":"https://openalex.org/S119727669","display_name":"Intelligent Decision Technologies","issn_l":"1872-4981","issn":["1872-4981","1875-8843"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Intelligent Decision Technologies","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2124509324","https://openalex.org/W2963469388","https://openalex.org/W2970641574","https://openalex.org/W4242744113","https://openalex.org/W4414131460","https://openalex.org/W4415124086"],"related_works":[],"abstract_inverted_index":{"The":[0],"discipline":[1],"of":[2,8,27,47,53,69,96,166,181],"Entity":[3],"Resolution":[4],"(ER),":[5],"the":[6,16,25,45,70,92,163],"process":[7],"identifying":[9],"and":[10,63,79,123,133,155,190],"linking":[11],"records":[12],"that":[13,138,176],"refer":[14],"to":[15,91],"same":[17],"real-world":[18],"entity,":[19],"has":[20],"been":[21],"fundamentally":[22],"reshaped":[23],"by":[24],"adoption":[26],"high-dimensional":[28],"vector":[29],"embeddings.":[30],"This":[31,56,169],"transformation":[32],"reframes":[33],"ER":[34,93,128],"as":[35],"a":[36,50,59,64,86,101,110,172,179],"large-scale":[37,66],"Approximate":[38],"Nearest":[39],"Neighbor":[40],"Search":[41],"(ANNS)":[42],"problem,":[43],"making":[44],"choice":[46],"ANNS":[48,73],"architecture":[49],"critical":[51],"determinant":[52],"system":[54],"performance.":[55],"paper":[57],"provides":[58,171],"deep":[60],"architectural":[61],"comparison":[62],"novel,":[65],"empirical":[67],"evaluation":[68],"two":[71,97],"dominant":[72],"paradigms:":[74],"graph-based":[75,151],"methods":[76,81,152],"(HNSW,":[77],"DiskANN)":[78],"partition-based":[80,139],"(Faiss-IVF+PQ,":[82],"Scann).":[83],"We":[84],"introduce":[85],"new":[87],"semi-synthetic":[88],"benchmark":[89],"tailored":[90],"task,":[94],"consisting":[95],"one-million-vector":[98],"datasets":[99],"with":[100],"known":[102],"ground":[103],"truth.":[104],"On":[105],"this":[106],"benchmark,":[107],"we":[108],"conduct":[109],"comprehensive":[111],"evaluation,":[112],"measuring":[113],"not":[114],"only":[115],"total":[116],"query":[117],"time":[118],"but":[119],"also":[120],"disaggregated":[121],"blocking":[122],"matching":[124,167],"times,":[125],"alongside":[126],"canonical":[127],"quality":[129],"metrics:":[130],"precision,":[131],"recall,":[132],"F1-score.":[134],"Our":[135],"findings":[136],"reveal":[137],"methods,":[140],"particularly":[141],"Scann,":[142],"offer":[143],"superior":[144,159],"performance":[145],"in":[146,178],"high-throughput,":[147],"moderate-recall":[148],"scenarios,":[149],"while":[150],"like":[153],"HNSW":[154],"DiskANN":[156],"are":[157],"unequivocally":[158],"for":[160,184],"applications":[161],"demanding":[162],"highest":[164],"levels":[165],"quality.":[168],"work":[170],"nuanced,":[173],"application-centric":[174],"analysis":[175],"culminates":[177],"set":[180],"actionable":[182],"recommendations":[183],"practitioners":[185],"designing":[186],"modern":[187],"data":[188],"integration":[189],"retrieval":[191],"systems.":[192]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-28T00:00:00"}
