{"id":"https://openalex.org/W3173588310","doi":"https://doi.org/10.1145/3448016.3452821","title":"Weighted Distinct Sampling: Cardinality Estimation for SPJ Queries","display_name":"Weighted Distinct Sampling: Cardinality Estimation for SPJ Queries","publication_year":2021,"publication_date":"2021-06-09","ids":{"openalex":"https://openalex.org/W3173588310","doi":"https://doi.org/10.1145/3448016.3452821","mag":"3173588310"},"language":"en","primary_location":{"id":"doi:10.1145/3448016.3452821","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3448016.3452821","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072101631","display_name":"Yuan Qiu","orcid":"https://orcid.org/0000-0002-3488-6386"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Yuan Qiu","raw_affiliation_strings":["Hong Kong University of Science and Technology, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100781536","display_name":"Yilei Wang","orcid":"https://orcid.org/0000-0002-7856-2527"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yilei Wang","raw_affiliation_strings":["Hong Kong University of Science and Technology, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009196125","display_name":"Ke Yi","orcid":"https://orcid.org/0000-0002-2178-3716"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ke Yi","raw_affiliation_strings":["Hong Kong University of Science and Technology &amp; SICS, Shenzhen University, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology &amp; SICS, Shenzhen University, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100450470","display_name":"Feifei Li","orcid":"https://orcid.org/0009-0003-0770-5775"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feifei Li","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007360737","display_name":"Bin Wu","orcid":"https://orcid.org/0000-0002-4743-1006"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Wu","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088363259","display_name":"Chaoqun Zhan","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaoqun Zhan","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5072101631"],"corresponding_institution_ids":["https://openalex.org/I200769079"],"apc_list":null,"apc_paid":null,"fwci":0.7641,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.72823849,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1465","last_page":"1477"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10908","display_name":"Analytical Chemistry and Chromatography","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cardinality","display_name":"Cardinality (data modeling)","score":0.8607969284057617},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7799526453018188},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.6886659860610962},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.6270452737808228},{"id":"https://openalex.org/keywords/join","display_name":"Join (topology)","score":0.5852564573287964},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5723171234130859},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.5716007947921753},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.5597185492515564},{"id":"https://openalex.org/keywords/conjunctive-query","display_name":"Conjunctive query","score":0.492721825838089},{"id":"https://openalex.org/keywords/relational-database-management-system","display_name":"Relational database management system","score":0.4588111340999603},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4586056172847748},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4455130398273468},{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.42155787348747253},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3529778718948364},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3327784240245819},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3106735646724701},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1618732511997223},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.11957550048828125},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11127173900604248}],"concepts":[{"id":"https://openalex.org/C87117476","wikidata":"https://www.wikidata.org/wiki/Q362383","display_name":"Cardinality (data modeling)","level":2,"score":0.8607969284057617},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7799526453018188},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.6886659860610962},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.6270452737808228},{"id":"https://openalex.org/C2776124973","wikidata":"https://www.wikidata.org/wiki/Q3183033","display_name":"Join (topology)","level":2,"score":0.5852564573287964},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5723171234130859},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.5716007947921753},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.5597185492515564},{"id":"https://openalex.org/C65647387","wikidata":"https://www.wikidata.org/wiki/Q1781706","display_name":"Conjunctive query","level":3,"score":0.492721825838089},{"id":"https://openalex.org/C24394798","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database management system","level":3,"score":0.4588111340999603},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4586056172847748},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4455130398273468},{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.42155787348747253},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3529778718948364},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3327784240245819},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3106735646724701},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1618732511997223},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.11957550048828125},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11127173900604248},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3448016.3452821","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3448016.3452821","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-111773","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-111773","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"},{"id":"pmh:oai:repository.ust.hk:1783.1-111773","is_oa":false,"landing_page_url":"http://gateway.isiknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=LinksAMR&SrcApp=PARTNER_APP&DestLinkType=FullRecord&DestApp=WOS&KeyUT=000747673800116","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W3397180","https://openalex.org/W1558832481","https://openalex.org/W1569403765","https://openalex.org/W1766932551","https://openalex.org/W1785933978","https://openalex.org/W1822348499","https://openalex.org/W1992023276","https://openalex.org/W1992363839","https://openalex.org/W2002791618","https://openalex.org/W2002919217","https://openalex.org/W2020584928","https://openalex.org/W2021850646","https://openalex.org/W2025051251","https://openalex.org/W2047424291","https://openalex.org/W2053075747","https://openalex.org/W2057058417","https://openalex.org/W2073479529","https://openalex.org/W2080745194","https://openalex.org/W2090403603","https://openalex.org/W2107443258","https://openalex.org/W2112452856","https://openalex.org/W2118229812","https://openalex.org/W2128227281","https://openalex.org/W2134786002","https://openalex.org/W2144982963","https://openalex.org/W2148706674","https://openalex.org/W2153406069","https://openalex.org/W2171903035","https://openalex.org/W2243803726","https://openalex.org/W2296677182","https://openalex.org/W2340222647","https://openalex.org/W2421547754","https://openalex.org/W2472603468","https://openalex.org/W2585835859","https://openalex.org/W2612048434","https://openalex.org/W2613577383","https://openalex.org/W2911324315","https://openalex.org/W2948371369","https://openalex.org/W2948827545","https://openalex.org/W2963554098","https://openalex.org/W2971076479","https://openalex.org/W3100923768","https://openalex.org/W3135209803","https://openalex.org/W3173588310","https://openalex.org/W4210596830","https://openalex.org/W4230435573","https://openalex.org/W4241185933"],"related_works":["https://openalex.org/W2378924333","https://openalex.org/W2362446711","https://openalex.org/W1495801388","https://openalex.org/W2551308855","https://openalex.org/W1760549314","https://openalex.org/W2098812463","https://openalex.org/W4247095193","https://openalex.org/W1901149804","https://openalex.org/W1855322793","https://openalex.org/W4297453533"],"abstract_inverted_index":{"SPJ":[0],"(select-project-join)":[1],"queries":[2,9,18],"form":[3],"the":[4,44,47,54,74,93],"backbone":[5],"of":[6,16,53],"many":[7],"SQL":[8],"used":[10,70],"in":[11,26,43,71,73],"practice.":[12],"Accurate":[13],"cardinality":[14,50,101],"estimation":[15,51],"these":[17],"is":[19,82],"thus":[20],"an":[21],"important":[22],"problem,":[23],"with":[24,92],"applications":[25],"query":[27,30],"optimization,":[28],"approximate":[29],"processing,":[31],"and":[32,60],"data":[33],"analytics.":[34],"However,":[35],"this":[36],"problem":[37],"has":[38],"not":[39,68],"been":[40,64],"rigorously":[41],"addressed":[42],"literature,":[45],"despite":[46],"fact":[48],"that":[49,83],"techniques":[52],"three":[55],"relational":[56],"operators,":[57],"selection,":[58],"projection,":[59],"join,":[61],"have":[62],"each":[63],"extensively":[65],"studied":[66],"(but":[67],"when":[69,97],"combination)":[72],"past":[75],"30+":[76],"years.":[77],"The":[78],"major":[79],"technical":[80],"difficulty":[81],"(distinct)":[84],"projection":[85],"seems":[86],"to":[87,90,100],"be":[88],"difficult":[89],"combine":[91],"other":[94],"two":[95],"operators":[96],"it":[98],"comes":[99],"estimation.":[102]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
