{"id":"https://openalex.org/W2066588467","doi":"https://doi.org/10.1145/1386118.1386121","title":"Sketches for size of join estimation","display_name":"Sketches for size of join estimation","publication_year":2008,"publication_date":"2008-08-01","ids":{"openalex":"https://openalex.org/W2066588467","doi":"https://doi.org/10.1145/1386118.1386121","mag":"2066588467"},"language":"en","primary_location":{"id":"doi:10.1145/1386118.1386121","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1386118.1386121","pdf_url":null,"source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065838987","display_name":"Florin Rusu","orcid":"https://orcid.org/0000-0002-7018-9043"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Florin Rusu","raw_affiliation_strings":["University of Florida, Gainesville"],"affiliations":[{"raw_affiliation_string":"University of Florida, Gainesville","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008335723","display_name":"Alin Dobra","orcid":"https://orcid.org/0000-0003-2033-9952"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alin Dobra","raw_affiliation_strings":["University of Florida, Gainesville"],"affiliations":[{"raw_affiliation_string":"University of Florida, Gainesville","institution_ids":["https://openalex.org/I33213144"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5065838987"],"corresponding_institution_ids":["https://openalex.org/I33213144"],"apc_list":null,"apc_paid":null,"fwci":1.906,"has_fulltext":false,"cited_by_count":67,"citation_normalized_percentile":{"value":0.87234202,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"33","issue":"3","first_page":"1","last_page":"46"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8655611276626587},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.5762732625007629},{"id":"https://openalex.org/keywords/moment","display_name":"Moment (physics)","score":0.574939489364624},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.5747393369674683},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5683563947677612},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4489808976650238},{"id":"https://openalex.org/keywords/point-estimation","display_name":"Point estimation","score":0.44131651520729065},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4324130415916443},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3753935992717743},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.11799928545951843},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11256518959999084}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8655611276626587},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.5762732625007629},{"id":"https://openalex.org/C179254644","wikidata":"https://www.wikidata.org/wiki/Q13222844","display_name":"Moment (physics)","level":2,"score":0.574939489364624},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.5747393369674683},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5683563947677612},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4489808976650238},{"id":"https://openalex.org/C41426520","wikidata":"https://www.wikidata.org/wiki/Q1192065","display_name":"Point estimation","level":2,"score":0.44131651520729065},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4324130415916443},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3753935992717743},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.11799928545951843},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11256518959999084},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1386118.1386121","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1386118.1386121","pdf_url":null,"source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6494893801","display_name":null,"funder_award_id":"NSF-CAREER-IIS-0448264","funder_id":"https://openalex.org/F4320337389","funder_display_name":"Division of Information and Intelligent Systems"}],"funders":[{"id":"https://openalex.org/F4320337389","display_name":"Division of Information and Intelligent Systems","ror":"https://ror.org/053a2cp42"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W151938044","https://openalex.org/W1493892051","https://openalex.org/W1500657154","https://openalex.org/W1599329921","https://openalex.org/W1845838099","https://openalex.org/W1964034873","https://openalex.org/W1998435329","https://openalex.org/W2010016018","https://openalex.org/W2013228841","https://openalex.org/W2020147322","https://openalex.org/W2026784425","https://openalex.org/W2040063291","https://openalex.org/W2041500280","https://openalex.org/W2064379477","https://openalex.org/W2080234606","https://openalex.org/W2107443258","https://openalex.org/W2110100895","https://openalex.org/W2119796071","https://openalex.org/W2139433757","https://openalex.org/W2147874260","https://openalex.org/W2148706674","https://openalex.org/W2153406069","https://openalex.org/W2168216703","https://openalex.org/W2295428206","https://openalex.org/W2314731117","https://openalex.org/W2913507641","https://openalex.org/W4229903866","https://openalex.org/W4231916799","https://openalex.org/W4235166168","https://openalex.org/W4251258520","https://openalex.org/W6629496199","https://openalex.org/W6831712467","https://openalex.org/W7075632795"],"related_works":["https://openalex.org/W4231775656","https://openalex.org/W2046435967","https://openalex.org/W2485872624","https://openalex.org/W2383646825","https://openalex.org/W2371018915","https://openalex.org/W2354191502","https://openalex.org/W1972225038","https://openalex.org/W2023578311","https://openalex.org/W3134658850","https://openalex.org/W2101259467"],"abstract_inverted_index":{"Sketching":[0],"techniques":[1,97,118,185,214],"provide":[2,158],"approximate":[3],"answers":[4],"to":[5,48,188,216,243,261,290],"aggregate":[6],"queries":[7],"both":[8,24],"for":[9,23,31,56,232,265,276,340],"data-streaming":[10],"and":[11,37,73,140,156,257,281,335],"distributed":[12],"computation.":[13],"Small":[14],"space":[15],"summaries":[16],"that":[17,52,135,150,160,180,228,324],"have":[18],"linearity":[19],"properties":[20,134],"are":[21,53,75,161,215],"required":[22],"types":[25],"of":[26,67,70,79,82,127,132,152,166,206,224,236,312],"applications.":[27],"The":[28,200,221,300,320],"prevalent":[29],"method":[30,44],"analyzing":[32],"sketches":[33,72,83,230],"uses":[34],"moment":[35],"analysis":[36,148,192],"distribution-independent":[38],"bounds":[39,51,63],"based":[40],"on":[41],"moments.":[42],"This":[43],"produces":[45],"clean,":[46],"easy":[47],"interpret,":[49],"theoretical":[50,62,172],"especially":[54],"useful":[55],"deriving":[57],"asymptotic":[58],"results.":[59],"However,":[60],"the":[61,68,103,116,121,130,137,146,153,170,182,190,194,203,213,225,233,239,244,250,254,258,272,291,316,332],"obscure":[64],"fine":[65],"details":[66],"behavior":[69,139],"various":[71,95,207],"they":[74],"mostly":[76],"not":[77],"indicative":[78],"which":[80,101,209],"type":[81],"should":[84],"be":[85,217,285,306],"used":[86,218],"in":[87,120,162,186,298,302],"practice.":[88],"Moreover,":[89],"no":[90],"significant":[91,296],"empirical":[92,178,321],"comparison":[93],"between":[94],"sketching":[96,117,184,246,266,277],"has":[98],"been":[99],"published,":[100],"makes":[102],"choice":[104],"even":[105],"harder.":[106],"In":[107],"this":[108],"article":[109],"we":[110,196],"take":[111],"a":[112,124,295],"close":[113,242],"look":[114],"at":[115],"proposed":[119],"literature":[122],"from":[123,198,253],"statistical":[125,147,191,255],"point":[126],"view":[128],"with":[129,193,288],"goal":[131],"determining":[133],"indicate":[136],"actual":[138],"producing":[141],"tighter":[142],"confidence":[143],"bounds.":[144],"Interestingly,":[145],"reveals":[149],"two":[151,273,310],"techniques,":[154],"Fast-AGMS":[155,229],"Count-Min,":[157],"results":[159,260],"some":[163],"cases":[164],"orders":[165,311],"magnitude":[167],"better":[168,341],"than":[169],"corresponding":[171],"predictions.":[173],"We":[174,248,269],"conduct":[175],"an":[176],"extensive":[177],"study":[179,201,226,256,322],"compares":[181],"different":[183],"order":[187],"corroborate":[189],"conclusions":[195],"draw":[197],"it.":[199],"indicates":[202],"expected":[204],"performance":[205],"sketches,":[208],"is":[210,227,326,331,338],"crucial":[211],"if":[212],"by":[219],"practitioners.":[220],"overall":[222],"conclusion":[223],"are,":[231],"full":[234],"spectrum":[235],"problems,":[237],"either":[238],"best,":[240,245],"or":[241],"technique.":[247],"apply":[249],"insights":[251],"obtained":[252],"experimental":[259],"design":[262],"effective":[263],"algorithms":[264],"interval":[267,278],"data.":[268],"show":[270],"how":[271],"basic":[274],"methods":[275,318],"data,":[279],"DMAP":[280,325],"fast":[282,336],"range-summation,":[283],"can":[284,305],"improved":[286,317],"significantly":[287],"respect":[289],"update":[292,303,329],"time":[293,304,330],"without":[294],"loss":[297],"accuracy.":[299,342],"gain":[301],"as":[307,309],"large":[308],"magnitude,":[313],"thus":[314],"making":[315],"practical.":[319],"suggests":[323],"preferable":[327],"when":[328],"critical":[333],"requirement":[334],"range-summation":[337],"desirable":[339]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":5},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
