{"id":"https://openalex.org/W2566913073","doi":"https://doi.org/10.1109/tkde.2016.2638838","title":"A Generic Method for Accelerating LSH-Based Similarity Join Processing","display_name":"A Generic Method for Accelerating LSH-Based Similarity Join Processing","publication_year":2016,"publication_date":"2016-12-13","ids":{"openalex":"https://openalex.org/W2566913073","doi":"https://doi.org/10.1109/tkde.2016.2638838","mag":"2566913073"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2016.2638838","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2016.2638838","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050042433","display_name":"Chenyun Yu","orcid":"https://orcid.org/0000-0001-9774-1590"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Chenyun Yu","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Kowloon Tong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Kowloon Tong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071030042","display_name":"Sarana Nutanong","orcid":"https://orcid.org/0000-0003-1068-850X"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Sarana Nutanong","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Kowloon Tong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Kowloon Tong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100724756","display_name":"Hangyu Li","orcid":"https://orcid.org/0000-0002-9659-3634"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Hangyu Li","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Kowloon Tong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Kowloon Tong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100390514","display_name":"Cong Wang","orcid":"https://orcid.org/0000-0003-0547-315X"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Cong Wang","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Kowloon Tong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Kowloon Tong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064553444","display_name":"Xingliang Yuan","orcid":"https://orcid.org/0000-0002-3701-4946"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xingliang Yuan","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Kowloon Tong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Kowloon Tong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5050042433"],"corresponding_institution_ids":["https://openalex.org/I168719708"],"apc_list":null,"apc_paid":null,"fwci":1.503,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.88620001,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"29","issue":"4","first_page":"712","last_page":"726"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.972599983215332,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9692999720573425,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/locality-sensitive-hashing","display_name":"Locality-sensitive hashing","score":0.9120196104049683},{"id":"https://openalex.org/keywords/jaccard-index","display_name":"Jaccard index","score":0.841759443283081},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8113892674446106},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5899293422698975},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5444546341896057},{"id":"https://openalex.org/keywords/euclidean-distance","display_name":"Euclidean distance","score":0.535988450050354},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.5178163647651672},{"id":"https://openalex.org/keywords/join","display_name":"Join (topology)","score":0.5111151337623596},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.49132004380226135},{"id":"https://openalex.org/keywords/nearest-neighbor-search","display_name":"Nearest neighbor search","score":0.4598333537578583},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4410397708415985},{"id":"https://openalex.org/keywords/hash-table","display_name":"Hash table","score":0.41396641731262207},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3731226921081543},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.35946953296661377},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.20035162568092346},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15132051706314087},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11308860778808594}],"concepts":[{"id":"https://openalex.org/C74270461","wikidata":"https://www.wikidata.org/wiki/Q1625299","display_name":"Locality-sensitive hashing","level":4,"score":0.9120196104049683},{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.841759443283081},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8113892674446106},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5899293422698975},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5444546341896057},{"id":"https://openalex.org/C120174047","wikidata":"https://www.wikidata.org/wiki/Q847073","display_name":"Euclidean distance","level":2,"score":0.535988450050354},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.5178163647651672},{"id":"https://openalex.org/C2776124973","wikidata":"https://www.wikidata.org/wiki/Q3183033","display_name":"Join (topology)","level":2,"score":0.5111151337623596},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.49132004380226135},{"id":"https://openalex.org/C116738811","wikidata":"https://www.wikidata.org/wiki/Q608751","display_name":"Nearest neighbor search","level":2,"score":0.4598333537578583},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4410397708415985},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.41396641731262207},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3731226921081543},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.35946953296661377},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.20035162568092346},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15132051706314087},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11308860778808594},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2016.2638838","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2016.2638838","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5600000023841858,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1413903985","display_name":null,"funder_award_id":"7200387","funder_id":"https://openalex.org/F4320309893","funder_display_name":"City University of Hong Kong"},{"id":"https://openalex.org/G4952193985","display_name":null,"funder_award_id":"ITS/307/15","funder_id":"https://openalex.org/F4320321920","funder_display_name":"Innovation and Technology Commission"}],"funders":[{"id":"https://openalex.org/F4320309893","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23"},{"id":"https://openalex.org/F4320321920","display_name":"Innovation and Technology Commission","ror":"https://ror.org/04vf9tr09"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W642889137","https://openalex.org/W1502916507","https://openalex.org/W1526840576","https://openalex.org/W1573493856","https://openalex.org/W1736726159","https://openalex.org/W1976422444","https://openalex.org/W1996760769","https://openalex.org/W2011039300","https://openalex.org/W2011806775","https://openalex.org/W2013262694","https://openalex.org/W2033201131","https://openalex.org/W2042160423","https://openalex.org/W2045848492","https://openalex.org/W2058903936","https://openalex.org/W2071572981","https://openalex.org/W2080844740","https://openalex.org/W2091133510","https://openalex.org/W2091148532","https://openalex.org/W2103012681","https://openalex.org/W2107427524","https://openalex.org/W2110026675","https://openalex.org/W2115500858","https://openalex.org/W2144265691","https://openalex.org/W2147717514","https://openalex.org/W2157092487","https://openalex.org/W2161936973","https://openalex.org/W2162006472","https://openalex.org/W2170037597","https://openalex.org/W2205071250","https://openalex.org/W2231297807","https://openalex.org/W2294518132","https://openalex.org/W2317953821","https://openalex.org/W2561380899","https://openalex.org/W2953019775","https://openalex.org/W3004538570","https://openalex.org/W4233413206","https://openalex.org/W4300601563","https://openalex.org/W6629956336","https://openalex.org/W6681117370","https://openalex.org/W6682969285","https://openalex.org/W6683401941","https://openalex.org/W6685012337","https://openalex.org/W6843793546"],"related_works":["https://openalex.org/W2911483473","https://openalex.org/W4401133510","https://openalex.org/W2885569103","https://openalex.org/W3094967175","https://openalex.org/W2166822184","https://openalex.org/W2135779989","https://openalex.org/W3083090961","https://openalex.org/W2045263322","https://openalex.org/W2754607325","https://openalex.org/W2901290148"],"abstract_inverted_index":{"Locality":[0],"sensitive":[1],"hashing":[2],"(LSH)":[3],"is":[4],"an":[5],"efficient":[6],"method":[7,57,74,99,130,177],"for":[8,119,143,194],"solving":[9],"the":[10,29,36,47,61,77,88,103,110,126,134,147,200],"problem":[11],"of":[12,38,49,63,72,83,90,105,128,175,199],"approximate":[13],"similarity":[14,23,152],"search":[15],"in":[16,28,76],"highdimensional":[17],"spaces.":[18],"Through":[19],"LSH,":[20],"a":[21,55,81],"high-dimensional":[22],"join":[24],"can":[25,100,137],"be":[26,138],"processed":[27],"same":[30,111,135],"way":[31],"as":[32],"hash":[33],"join,":[34],"making":[35],"cost":[37],"joining":[39,64],"two":[40,65],"large":[41,66],"datasets":[42,67,166],"linear.":[43],"By":[44],"judicially":[45],"analyzing":[46],"properties":[48],"multiple":[50],"LSH":[51,91,117,141,180,192],"algorithms,":[52],"we":[53,79,124,187],"propose":[54],"generic":[56],"to":[58,86,115,140,182,190],"speed":[59],"up":[60],"process":[62],"using":[68,164],"LSH.":[69],"The":[70],"crux":[71],"our":[73,97,129,168,176],"lies":[75],"waywhich":[78],"identify":[80],"set":[82],"representative":[84],"points":[85],"reduce":[87,102],"number":[89,104],"lookups.":[92],"Theoretical":[93],"analyzes":[94,170],"show":[95,172],"that":[96,133],"proposed":[98],"greatly":[101],"lookup":[106],"operations":[107],"and":[108,155,171],"retain":[109],"result":[112],"accuracy":[113],"compared":[114],"executing":[116],"lookups":[118,193],"every":[120],"query":[121,201],"point.":[122],"Furthermore,":[123],"demonstrate":[125],"generality":[127],"by":[131],"showing":[132],"principle":[136],"applied":[139],"algorithms":[142],"three":[144],"different":[145],"metrics:":[146],"Euclidean":[148],"distance":[149,157],"(QALSH),":[150],"Jaccard":[151],"measure":[153],"(MinHash),":[154],"Hamming":[156],"(sequence":[158],"hashing).":[159],"Results":[160],"from":[161],"experimental":[162],"studies":[163],"real":[165],"confirm":[167],"error":[169],"significant":[173],"improvements":[174],"overthe":[178],"state-of-the-art":[179],"method:":[181],"achieve":[183],"over":[184],"0.95":[185],"recall,":[186],"only":[188],"need":[189],"operate":[191],"at":[195],"most":[196],"15":[197],"percent":[198],"points.":[202]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
