{"id":"https://openalex.org/W2111806841","doi":"https://doi.org/10.14778/1453856.1453884","title":"Tighter estimation using bottom k sketches","display_name":"Tighter estimation using bottom k sketches","publication_year":2008,"publication_date":"2008-08-01","ids":{"openalex":"https://openalex.org/W2111806841","doi":"https://doi.org/10.14778/1453856.1453884","mag":"2111806841"},"language":"en","primary_location":{"id":"doi:10.14778/1453856.1453884","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1453856.1453884","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026385549","display_name":"Edith Cohen","orcid":"https://orcid.org/0000-0002-3926-8237"},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Edith Cohen","raw_affiliation_strings":["AT&amp;T Labs-Research, Florham Park, NJ"],"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs-Research, Florham Park, NJ","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006699796","display_name":"Haim Kaplan","orcid":"https://orcid.org/0000-0001-9586-8002"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Haim Kaplan","raw_affiliation_strings":["Tel Aviv University, Tel Aviv, Israel","Tel Aviv University, Tel Aviv, Israel,"],"affiliations":[{"raw_affiliation_string":"Tel Aviv University, Tel Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]},{"raw_affiliation_string":"Tel Aviv University, Tel Aviv, Israel,","institution_ids":["https://openalex.org/I16391192"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5026385549"],"corresponding_institution_ids":["https://openalex.org/I1283103587"],"apc_list":null,"apc_paid":null,"fwci":6.4572,"has_fulltext":false,"cited_by_count":81,"citation_normalized_percentile":{"value":0.97022801,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"1","issue":"1","first_page":"213","last_page":"224"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.8820357322692871},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.8410431146621704},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6423702239990234},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5287632346153259},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4936738908290863},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4526998996734619},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.43568557500839233},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.4210675060749054},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.41450974345207214},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.3898876905441284},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.28979218006134033},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19329038262367249},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.09906899929046631}],"concepts":[{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.8820357322692871},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.8410431146621704},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6423702239990234},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5287632346153259},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4936738908290863},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4526998996734619},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.43568557500839233},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.4210675060749054},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.41450974345207214},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.3898876905441284},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28979218006134033},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19329038262367249},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.09906899929046631},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.14778/1453856.1453884","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1453856.1453884","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.140.7808","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.140.7808","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.vldb.org/pvldb/1/1453884.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.146.209","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.146.209","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.math.tau.ac.il/~haimk/papers/samplenr.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W579397108","https://openalex.org/W1562712103","https://openalex.org/W1588537298","https://openalex.org/W1777225603","https://openalex.org/W1965996575","https://openalex.org/W1972076792","https://openalex.org/W1979819093","https://openalex.org/W1982682305","https://openalex.org/W1984566373","https://openalex.org/W1992363839","https://openalex.org/W1993482412","https://openalex.org/W1993865637","https://openalex.org/W1997438973","https://openalex.org/W2001947543","https://openalex.org/W2020434732","https://openalex.org/W2027689065","https://openalex.org/W2028831670","https://openalex.org/W2036304306","https://openalex.org/W2041185818","https://openalex.org/W2048779798","https://openalex.org/W2081903609","https://openalex.org/W2085845250","https://openalex.org/W2087982439","https://openalex.org/W2097332592","https://openalex.org/W2119098504","https://openalex.org/W2126356486","https://openalex.org/W2132069633","https://openalex.org/W2137885048","https://openalex.org/W2143085379","https://openalex.org/W2148885851","https://openalex.org/W2152092514","https://openalex.org/W2152617961","https://openalex.org/W2163288162","https://openalex.org/W2341535507","https://openalex.org/W4233471163","https://openalex.org/W4243255773","https://openalex.org/W6635223630","https://openalex.org/W6679663036","https://openalex.org/W7067760825"],"related_works":["https://openalex.org/W2366403280","https://openalex.org/W1495108544","https://openalex.org/W3148229873","https://openalex.org/W2150160875","https://openalex.org/W4242223894","https://openalex.org/W2091301346","https://openalex.org/W1517524280","https://openalex.org/W4389760904","https://openalex.org/W4306886878","https://openalex.org/W4323520239"],"abstract_inverted_index":{"Summaries":[0],"of":[1,19,24,40,63,100,115,122,142,152,177,187,198,211],"massive":[2],"data":[3,65,70,143,181],"sets":[4],"support":[5,73],"approximate":[6],"query":[7],"processing":[8],"over":[9,16,30],"the":[10,22,49,64,97,101,108,126,136,153,188,199],"original":[11],"data.":[12],"A":[13],"basic":[14],"aggregate":[15],"a":[17,28,36,112,175],"set":[18,103],"records":[20],"is":[21,94,133,139,157],"weight":[23,99,138],"subpopulations":[25,193],"specified":[26],"as":[27,119],"predicate":[29],"records'":[31],"attributes.":[32],"Bottom-k":[33],"sketches":[34,121],"are":[35,147],"powerful":[37],"summarization":[38,109],"format":[39],"weighted":[41,51,206],"items":[42],"that":[43,132],"includes":[44],"priority":[45],"sampling":[46,52,207],"[22],":[47],"and":[48,69,72,75,83,125,170,179,210],"classic":[50],"without":[53,111,208],"replacement.":[54],"They":[55],"can":[56],"be":[57,105],"computed":[58,106],"efficiently":[59],"for":[60,86,120,205],"many":[61],"representations":[62],"including":[66],"distributed":[67],"databases":[68],"streams":[71],"coordinated":[74],"all-distances":[76],"sketches.":[77],"We":[78,165,183],"derive":[79],"novel":[80],"unbiased":[81],"estimators":[82,146,204],"confidence":[84,213],"bounds":[85,214],"subpopulation":[87],"weight.":[88],"Our":[89,145],"rank":[90],"conditioning":[91,129],"(RC)":[92],"estimator":[93,131,155,190,201],"applicable":[95,134,160],"when":[96,135],"total":[98,137],"sketched":[102],"cannot":[104],"by":[107],"algorithm":[110],"significant":[113],"use":[114],"additional":[116],"resources":[117],"(such":[118],"network":[123],"neighborhoods)":[124],"tighter":[127],"subset":[128],"(SC)":[130],"available":[140],"(sketches":[141],"streams).":[144],"derived":[148],"using":[149,174],"clever":[150],"applications":[151],"Horvitz-Thompson":[154],"(that":[156],"not":[158],"directly":[159],"to":[161],"bottom-":[162],"k":[163],"sketches).":[164],"develop":[166],"efficient":[167],"computational":[168],"methods":[169],"conduct":[171],"performance":[172],"evaluation":[173],"range":[176],"synthetic":[178],"real":[180],"sets.":[182],"demonstrate":[184],"considerable":[185],"benefits":[186],"SC":[189],"on":[191],"larger":[192],"(over":[194,202,215],"all":[195,216],"other":[196],"estimators);":[197],"RC":[200],"existing":[203],"replacement);":[209],"our":[212],"previous":[217],"approaches).":[218]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":8},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":5}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
