{"id":"https://openalex.org/W2426521939","doi":"https://doi.org/10.1145/2882903.2882915","title":"Streaming Algorithms for Robust Distinct Elements","display_name":"Streaming Algorithms for Robust Distinct Elements","publication_year":2016,"publication_date":"2016-06-14","ids":{"openalex":"https://openalex.org/W2426521939","doi":"https://doi.org/10.1145/2882903.2882915","mag":"2426521939"},"language":"en","primary_location":{"id":"doi:10.1145/2882903.2882915","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2882903.2882915","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2882915&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=2882915&type=pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100368457","display_name":"Di Chen","orcid":"https://orcid.org/0000-0002-2332-6666"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Di Chen","raw_affiliation_strings":["HKUST, Kowloon, Hong Kong"],"affiliations":[{"raw_affiliation_string":"HKUST, Kowloon, Hong Kong","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100418224","display_name":"Qin Zhang","orcid":"https://orcid.org/0000-0002-6851-3115"},"institutions":[{"id":"https://openalex.org/I4210119109","display_name":"Indiana University Bloomington","ror":"https://ror.org/02k40bc56","country_code":"US","type":"education","lineage":["https://openalex.org/I4210119109","https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qin Zhang","raw_affiliation_strings":["Indiana University Bloomington, Bloomington, IN, USA"],"affiliations":[{"raw_affiliation_string":"Indiana University Bloomington, Bloomington, IN, USA","institution_ids":["https://openalex.org/I4210119109"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100368457"],"corresponding_institution_ids":["https://openalex.org/I200769079"],"apc_list":null,"apc_paid":null,"fwci":1.5274,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.83452311,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1433","last_page":"1447"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7058533430099487},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4809650480747223}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7058533430099487},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4809650480747223}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/2882903.2882915","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2882903.2882915","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2882915&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 International Conference on Management of Data","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.714.6246","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.714.6246","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://homes.soic.indiana.edu/qzhangcs/papers/sigmod16-robustDE.pdf","raw_type":"text"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-77830","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-77830","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"},{"id":"pmh:oai:repository.ust.hk:1783.1-77830","is_oa":false,"landing_page_url":"http://gateway.isiknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=LinksAMR&SrcApp=PARTNER_APP&DestLinkType=FullRecord&DestApp=WOS&KeyUT=000452538600097","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference paper"}],"best_oa_location":{"id":"doi:10.1145/2882903.2882915","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2882903.2882915","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2882915&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 International Conference on Management of Data","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3102486657","display_name":null,"funder_award_id":"F-1621","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4527037313","display_name":null,"funder_award_id":"CCF-1525024","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8537104586","display_name":"Readiness through Integrative Science and Engineering: Refining and Testing a Co-Constructed Curriculum Approach with Head Start Partners","funder_award_id":"1621161","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332953","display_name":"Indiana University Bloomington","ror":"https://ror.org/02k40bc56"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2426521939.pdf","grobid_xml":"https://content.openalex.org/works/W2426521939.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W8870360","https://openalex.org/W1525949272","https://openalex.org/W1547556487","https://openalex.org/W1785933978","https://openalex.org/W1992363839","https://openalex.org/W2007025103","https://openalex.org/W2007682403","https://openalex.org/W2012833704","https://openalex.org/W2025051251","https://openalex.org/W2032475142","https://openalex.org/W2038276547","https://openalex.org/W2040615703","https://openalex.org/W2046920615","https://openalex.org/W2080745194","https://openalex.org/W2089066317","https://openalex.org/W2099480861","https://openalex.org/W2103126020","https://openalex.org/W2108598243","https://openalex.org/W2108991785","https://openalex.org/W2110411158","https://openalex.org/W2111678491","https://openalex.org/W2132069633","https://openalex.org/W2132822431","https://openalex.org/W2139276812","https://openalex.org/W2144982963","https://openalex.org/W2146420558","https://openalex.org/W2147717514","https://openalex.org/W2148885851","https://openalex.org/W2153329411","https://openalex.org/W2162006472","https://openalex.org/W4242587584","https://openalex.org/W6600367688","https://openalex.org/W6679663036"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857"],"abstract_inverted_index":{"We":[0,110],"study":[1,34],"the":[2,9,35,38,48,52,65,70,76,83,99,128,152],"problem":[3,36,84],"of":[4,72,85,130,154],"estimating":[5],"distinct":[6,73,87,146],"elements":[7,147],"in":[8,18,37,47,75,98],"data":[10,23,26,40],"stream":[11,49],"model,":[12],"which":[13],"has":[14],"a":[15,57,61,103,122],"central":[16],"role":[17],"traffic":[19],"monitoring,":[20],"query":[21],"optimization,":[22],"mining":[24],"and":[25,60,64,89,92,127],"integration.":[27],"Different":[28],"from":[29],"all":[30],"previous":[31],"work,":[32],"we":[33,81,106,135],"noisy":[39],"setting,":[41],"where":[42],"two":[43],"different":[44],"looking":[45],"items":[46],"may":[50],"reference":[51],"same":[53],"entity":[54],"(determined":[55],"by":[56,120],"distance":[58],"function":[59],"threshold":[62],"value),":[63],"goal":[66],"is":[67],"to":[68,116],"estimate":[69],"number":[71],"entities":[74],"stream.":[77],"In":[78],"this":[79],"paper,":[80],"formalize":[82],"robust":[86],"elements,":[88],"develop":[90],"space":[91],"time-efficient":[93],"streaming":[94],"algorithms":[95,140],"for":[96],"datasets":[97],"Euclidean":[100],"space,":[101],"using":[102],"novel":[104],"technique":[105],"call":[107],"bucket":[108,125],"sampling.":[109],"also":[111],"extend":[112],"our":[113,139,155],"algorithmic":[114],"framework":[115],"other":[117],"metric":[118],"spaces":[119],"establishing":[121],"connection":[123],"between":[124],"sampling":[126],"theory":[129],"locality":[131],"sensitive":[132],"hashing.":[133],"Moreover,":[134],"formally":[136],"prove":[137],"that":[138],"are":[141],"still":[142],"effective":[143],"under":[144],"small":[145],"ambiguity.":[148],"Our":[149],"experiments":[150],"demonstrate":[151],"practicality":[153],"algorithms.":[156]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
