{"id":"https://openalex.org/W2944916577","doi":"https://doi.org/10.1145/3292500.3330825","title":"A Memory-Efficient Sketch Method for Estimating High Similarities in Streaming Sets","display_name":"A Memory-Efficient Sketch Method for Estimating High Similarities in Streaming Sets","publication_year":2019,"publication_date":"2019-07-25","ids":{"openalex":"https://openalex.org/W2944916577","doi":"https://doi.org/10.1145/3292500.3330825","mag":"2944916577"},"language":"en","primary_location":{"id":"doi:10.1145/3292500.3330825","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292500.3330825","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1905.08977","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Pinghui Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pinghui Wang","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yiyan Qi","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiyan Qi","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yuanming Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanming Zhang","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qiaozhu Zhai","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiaozhu Zhai","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chenxu Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenxu Wang","raw_affiliation_strings":["Xi'an Jiaotong University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Shenzhen, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":null,"display_name":"John C.S. Lui","orcid":null},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"John C.S. Lui","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xiaohong Guan","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohong Guan","raw_affiliation_strings":["Xi'an Jiaotong University &amp; Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University &amp; Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I87445476","https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":1.8392,"has_fulltext":false,"cited_by_count":39,"citation_normalized_percentile":{"value":0.88609596,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"25","last_page":"33"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/jaccard-index","display_name":"Jaccard index","score":0.9688000082969666},{"id":"https://openalex.org/keywords/sketch","display_name":"Sketch","score":0.7490000128746033},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6316999793052673},{"id":"https://openalex.org/keywords/cardinality","display_name":"Cardinality (data modeling)","score":0.6154000163078308},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4097999930381775},{"id":"https://openalex.org/keywords/locality-sensitive-hashing","display_name":"Locality-sensitive hashing","score":0.400299996137619},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.3912000060081482},{"id":"https://openalex.org/keywords/similitude","display_name":"Similitude","score":0.3379000127315521}],"concepts":[{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.9688000082969666},{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.7490000128746033},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7150999903678894},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6316999793052673},{"id":"https://openalex.org/C87117476","wikidata":"https://www.wikidata.org/wiki/Q362383","display_name":"Cardinality (data modeling)","level":2,"score":0.6154000163078308},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.45329999923706055},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4097999930381775},{"id":"https://openalex.org/C74270461","wikidata":"https://www.wikidata.org/wiki/Q1625299","display_name":"Locality-sensitive hashing","level":4,"score":0.400299996137619},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.3912000060081482},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.35359999537467957},{"id":"https://openalex.org/C143271835","wikidata":"https://www.wikidata.org/wiki/Q254515","display_name":"Similitude","level":2,"score":0.3379000127315521},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3253999948501587},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.28209999203681946},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2700999975204468},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.26930001378059387},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2648000121116638},{"id":"https://openalex.org/C147224247","wikidata":"https://www.wikidata.org/wiki/Q885373","display_name":"Bloom filter","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.2614000141620636}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3292500.3330825","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292500.3330825","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1905.08977","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.08977","pdf_url":"https://arxiv.org/pdf/1905.08977","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1905.08977","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.08977","pdf_url":"https://arxiv.org/pdf/1905.08977","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1506430804","https://openalex.org/W1583707981","https://openalex.org/W1999092742","https://openalex.org/W2005731313","https://openalex.org/W2008365755","https://openalex.org/W2012833704","https://openalex.org/W2025051251","https://openalex.org/W2029852131","https://openalex.org/W2053377618","https://openalex.org/W2060170830","https://openalex.org/W2064379477","https://openalex.org/W2081193615","https://openalex.org/W2120031510","https://openalex.org/W2123845384","https://openalex.org/W2126907894","https://openalex.org/W2140431670","https://openalex.org/W2144982963","https://openalex.org/W2152228468","https://openalex.org/W2162006472","https://openalex.org/W2171013708","https://openalex.org/W2418258478","https://openalex.org/W2583214467","https://openalex.org/W2604248105","https://openalex.org/W2762566515","https://openalex.org/W2765866471","https://openalex.org/W2772632044","https://openalex.org/W2785764160","https://openalex.org/W2891345706","https://openalex.org/W6604386673","https://openalex.org/W6674970736","https://openalex.org/W6679663036","https://openalex.org/W6682042839"],"related_works":[],"abstract_inverted_index":{"Estimating":[0],"set":[1],"similarity":[2,29,42,174],"and":[3,18,32,44,54,99,109,188,217,238],"detecting":[4],"highly":[5],"similar":[6],"sets":[7,31,84],"are":[8,93],"fundamental":[9],"problems":[10],"in":[11,95,135],"areas":[12],"such":[13,40],"as":[14,41,85,87],"databases,":[15],"machine":[16],"learning,":[17],"information":[19],"retrieval.":[20],"MinHash":[21,53,66,78,108,233],"is":[22,101,225],"a":[23,96,124,157,166,202,213],"well-known":[24],"technique":[25],"for":[26,37,69,160,171,183,201,205,241],"approximating":[27],"Jaccard":[28,133,173],"of":[30,63,90,150,198,215],"has":[33],"been":[34],"successfully":[35],"used":[36,200],"many":[38],"applications":[39],"search":[43],"large":[45],"scale":[46],"learning.":[47],"Its":[48],"two":[49],"compressed":[50],"versions,":[51],"b-bit":[52,107],"Odd":[55,110],"Sketch,":[56],"can":[57,79],"significantly":[58],"reduce":[59],"the":[60,64,185,190,196,206,235],"memory":[61,125,193,230],"usage":[62,194],"original":[65],"method,":[67,128],"especially":[68],"estimating":[70,242],"high":[71,243],"similarities":[72,74,134],"(i.e.,":[73,195],"around":[75],"1).":[76],"Although":[77],"be":[80],"applied":[81],"to":[82,113,130,139,155],"static":[83],"well":[86],"streaming":[88,97,116,136],"sets,":[89],"which":[91],"elements":[92],"given":[94],"fashion":[98],"cardinality":[100],"unknown":[102],"or":[103],"even":[104],"infinite,":[105],"unfortunately,":[106],"Sketch":[111],"fail":[112],"deal":[114],"with":[115,234],"data.":[117],"To":[118],"solve":[119],"this":[120],"problem,":[121],"we":[122,180],"design":[123],"efficient":[126,231],"sketch":[127,159],"MaxLogHash,":[129],"accurately":[131],"estimate":[132],"sets.":[137],"Compared":[138],"MinHash,":[140],"our":[141,222],"method":[142,223],"uses":[143],"smaller":[144],"sized":[145],"registers":[146,199],"(each":[147],"register":[148],"consists":[149],"less":[151],"than":[152,232],"7":[153],"bits)":[154],"build":[156],"compact":[158],"each":[161],"set.":[162],"We":[163,209],"also":[164],"provide":[165],"simple":[167],"yet":[168],"accurate":[169],"estimator":[170],"inferring":[172],"from":[175],"MaxLogHash":[176,203,224],"sketches.":[177],"In":[178],"addition,":[179],"derive":[181],"formulas":[182],"bounding":[184],"estimation":[186],"error":[187],"determine":[189],"smallest":[191],"necessary":[192],"number":[197],"sketch)":[204],"desired":[207],"accuracy.":[208],"conduct":[210],"experiments":[211],"on":[212],"variety":[214],"datasets,":[216],"experimental":[218],"results":[219],"show":[220],"that":[221],"about":[226],"5":[227],"times":[228],"more":[229],"same":[236],"accuracy":[237],"computational":[239],"cost":[240],"similarities.":[244]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2019-05-29T00:00:00"}
