{"id":"https://openalex.org/W3185475042","doi":"https://doi.org/10.14778/3476249.3476285","title":"Accelerating approximate aggregation queries with expensive predicates","display_name":"Accelerating approximate aggregation queries with expensive predicates","publication_year":2021,"publication_date":"2021-07-01","ids":{"openalex":"https://openalex.org/W3185475042","doi":"https://doi.org/10.14778/3476249.3476285","mag":"3185475042"},"language":"en","primary_location":{"id":"doi:10.14778/3476249.3476285","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3476249.3476285","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2108.06313","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Daniel Kang","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Daniel Kang","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":null,"display_name":"John Guibas","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John Guibas","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Peter Bailis","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peter Bailis","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tatsunori Hashimoto","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tatsunori Hashimoto","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yi Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yi Sun","raw_affiliation_strings":["University of Chicago"],"affiliations":[{"raw_affiliation_string":"University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"last","author":{"id":null,"display_name":"Matei Zaharia","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matei Zaharia","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":1.3851,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.813838,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"14","issue":"11","first_page":"2341","last_page":"2354"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.3028999865055084,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.3028999865055084,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.2029999941587448,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.09839999675750732,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/predicate","display_name":"Predicate (mathematical logic)","score":0.4733999967575073},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4551999866962433},{"id":"https://openalex.org/keywords/plug-in","display_name":"Plug-in","score":0.4510999917984009},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.42829999327659607},{"id":"https://openalex.org/keywords/proxy","display_name":"Proxy (statistics)","score":0.41040000319480896},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.38499999046325684},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.3273000121116638}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8169000148773193},{"id":"https://openalex.org/C140146324","wikidata":"https://www.wikidata.org/wiki/Q1144319","display_name":"Predicate (mathematical logic)","level":2,"score":0.4733999967575073},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.45980000495910645},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4551999866962433},{"id":"https://openalex.org/C4924752","wikidata":"https://www.wikidata.org/wiki/Q184148","display_name":"Plug-in","level":2,"score":0.4510999917984009},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.42829999327659607},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.41040000319480896},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.38499999046325684},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3571000099182129},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.3273000121116638},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.3255000114440918},{"id":"https://openalex.org/C82578977","wikidata":"https://www.wikidata.org/wiki/Q16773055","display_name":"Data aggregator","level":3,"score":0.3240000009536743},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C148764684","wikidata":"https://www.wikidata.org/wiki/Q621751","display_name":"Approximation algorithm","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C3018263672","wikidata":"https://www.wikidata.org/wiki/Q1296251","display_name":"Efficient algorithm","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.28540000319480896},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.26080000400543213}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.14778/3476249.3476285","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3476249.3476285","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2108.06313","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2108.06313","pdf_url":"https://arxiv.org/pdf/2108.06313","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2108.06313","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2108.06313","pdf_url":"https://arxiv.org/pdf/2108.06313","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W22913383","https://openalex.org/W1834627138","https://openalex.org/W1979603710","https://openalex.org/W1996838895","https://openalex.org/W1998783890","https://openalex.org/W2012254817","https://openalex.org/W2017011036","https://openalex.org/W2065756054","https://openalex.org/W2071989194","https://openalex.org/W2097044086","https://openalex.org/W2099813784","https://openalex.org/W2106834095","https://openalex.org/W2112976607","https://openalex.org/W2118382926","https://openalex.org/W2132823934","https://openalex.org/W2152029707","https://openalex.org/W2297046696","https://openalex.org/W2341528187","https://openalex.org/W2752236330","https://openalex.org/W2786278116","https://openalex.org/W2803764703","https://openalex.org/W2887117815","https://openalex.org/W2891345706","https://openalex.org/W2912981821","https://openalex.org/W2963163009","https://openalex.org/W2963510045","https://openalex.org/W2971196067","https://openalex.org/W2998032620","https://openalex.org/W2998752879","https://openalex.org/W3000318171","https://openalex.org/W3027484068","https://openalex.org/W3031176864","https://openalex.org/W3031844875","https://openalex.org/W3081937877","https://openalex.org/W3082972076","https://openalex.org/W3106772683","https://openalex.org/W3109496323","https://openalex.org/W3167312943","https://openalex.org/W3185475042","https://openalex.org/W4214894679","https://openalex.org/W4246006899","https://openalex.org/W4251617391","https://openalex.org/W4288086218"],"related_works":[],"abstract_inverted_index":{"Researchers":[0],"and":[1,32,99,160],"industry":[2],"analysts":[3,40],"are":[4,19,30,41,59,70,147],"increasingly":[5],"interested":[6,42],"in":[7,43,179],"computing":[8],"aggregation":[9,86,92],"queries":[10,46,58,87,93],"over":[11],"large,":[12],"unstructured":[13],"datasets":[14],"with":[15,88,94,186],"selective":[16],"predicates":[17,69],"that":[18,105,116,121,142,172,188,197],"computed":[20],"using":[21,78],"expensive":[22,31,95],"deep":[23],"neural":[24],"networks":[25],"(DNNs).":[26],"As":[27],"these":[28,45,154],"DNNs":[29],"because":[33,62],"many":[34],"applications":[35],"can":[36],"tolerate":[37],"approximate":[38,51],"answers,":[39],"accelerating":[44],"via":[47],"approximations.":[48],"Unfortunately,":[49],"standard":[50],"query":[52,102],"processing":[53,103],"techniques":[54],"to":[55,136,163,166,211],"accelerate":[56,91],"such":[57],"not":[60,84,123,190],"applicable":[61],"they":[63],"assume":[64],"the":[65,68,113,125,134,145,167,192],"result":[66],"of":[67,73,183],"available":[71],"ahead":[72],"time.":[74],"Furthermore,":[75],"recent":[76],"work":[77],"cheap":[79],"approximations":[80],"(i.e.,":[81],"proxies)":[82],"do":[83,122],"support":[85],"predicates.":[89],"To":[90,127],"predicates,":[96],"we":[97,131],"develop":[98],"analyze":[100],"a":[101,180],"algorithm":[104],"leverages":[106],"proxies":[107],"(ABAE).":[108],"ABAE":[109,156,173,198],"must":[110],"account":[111],"for":[112],"key":[114],"challenge":[115],"it":[117],"may":[118,189],"sample":[119,164],"records":[120,138,143],"satisfy":[124,191],"predicate.":[126,193],"address":[128],"this":[129],"challenge,":[130],"first":[132],"use":[133],"proxy":[135],"group":[137],"into":[139,150],"strata":[140],"so":[141],"satisfying":[144],"predicate":[146],"ideally":[148],"grouped":[149],"few":[151],"strata.":[152],"Given":[153],"strata,":[155],"uses":[157],"pilot":[158],"sampling":[159,185],"plugin":[161],"estimates":[162],"according":[165],"optimal":[168,177],"allocation.":[169],"We":[170,194],"show":[171,196],"converges":[174],"at":[175],"an":[176],"rate":[178],"novel":[181],"analysis":[182],"stratified":[184],"draws":[187],"further":[195],"outperforms":[199],"on":[200,202],"baselines":[201],"six":[203],"real-world":[204],"datasets,":[205],"reducing":[206],"labeling":[207],"costs":[208],"by":[209],"up":[210],"2.3X.":[212]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2021-08-02T00:00:00"}
