{"id":"https://openalex.org/W4392453369","doi":"https://doi.org/10.14778/3636218.3636222","title":"Cache-Efficient Top-k Aggregation over High Cardinality Large Datasets","display_name":"Cache-Efficient Top-k Aggregation over High Cardinality Large Datasets","publication_year":2023,"publication_date":"2023-12-01","ids":{"openalex":"https://openalex.org/W4392453369","doi":"https://doi.org/10.14778/3636218.3636222"},"language":"en","primary_location":{"id":"doi:10.14778/3636218.3636222","is_oa":false,"landing_page_url":"http://dx.doi.org/10.14778/3636218.3636222","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079891721","display_name":"Tarique Siddiqui","orcid":"https://orcid.org/0009-0002-0866-7275"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tarique Siddiqui","raw_affiliation_strings":["Microsoft Research, Redmond, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, Washington, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063257827","display_name":"Vivek Narasayya","orcid":"https://orcid.org/0000-0001-7011-7886"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vivek Narasayya","raw_affiliation_strings":["Microsoft Research, Redmond, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, Washington, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109571500","display_name":"Marius Dumitru","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marius Dumitru","raw_affiliation_strings":["Microsoft, Redmond, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, Washington, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038037154","display_name":"Surajit Chaudhuri","orcid":"https://orcid.org/0000-0001-8252-5270"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Surajit Chaudhuri","raw_affiliation_strings":["Microsoft Research, Redmond, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, Washington, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5079891721"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.22810642,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"17","issue":"4","first_page":"644","last_page":"656"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9786999821662903,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.7559073567390442},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7094051837921143},{"id":"https://openalex.org/keywords/cardinality","display_name":"Cardinality (data modeling)","score":0.6779301762580872},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.610565721988678},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.43518924713134766},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.4160756468772888},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.23625081777572632}],"concepts":[{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.7559073567390442},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7094051837921143},{"id":"https://openalex.org/C87117476","wikidata":"https://www.wikidata.org/wiki/Q362383","display_name":"Cardinality (data modeling)","level":2,"score":0.6779301762580872},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.610565721988678},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.43518924713134766},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.4160756468772888},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.23625081777572632}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3636218.3636222","is_oa":false,"landing_page_url":"http://dx.doi.org/10.14778/3636218.3636222","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W139562302","https://openalex.org/W1656389077","https://openalex.org/W1970779762","https://openalex.org/W1992730709","https://openalex.org/W1995302096","https://openalex.org/W2006552857","https://openalex.org/W2009688537","https://openalex.org/W2021651899","https://openalex.org/W2025051251","https://openalex.org/W2029866183","https://openalex.org/W2043934800","https://openalex.org/W2067089961","https://openalex.org/W2080234606","https://openalex.org/W2113139394","https://openalex.org/W2122048769","https://openalex.org/W2124851765","https://openalex.org/W2125529470","https://openalex.org/W2236871065","https://openalex.org/W2292693431","https://openalex.org/W2296677182","https://openalex.org/W2406955896","https://openalex.org/W2790634852","https://openalex.org/W2798445803","https://openalex.org/W4239354938","https://openalex.org/W4248949446","https://openalex.org/W4297970464"],"related_works":["https://openalex.org/W2147122795","https://openalex.org/W2061075966","https://openalex.org/W3147501184","https://openalex.org/W2167303720","https://openalex.org/W2109715593","https://openalex.org/W3161817247","https://openalex.org/W1918294866","https://openalex.org/W2012518269","https://openalex.org/W1863436361","https://openalex.org/W2401390283"],"abstract_inverted_index":{"Top-k":[0],"aggregation":[1,83,189,211],"queries":[2,21],"are":[3,22,127],"widely":[4],"used":[5],"in":[6,89],"data":[7,70,91,95,103,148],"analytics":[8],"for":[9,30,50,121,159,187,203],"summarizing":[10],"and":[11,33,105,117,133,150,172,201],"identifying":[12],"important":[13],"groups":[14,32,37,114,126],"from":[15],"large":[16,52],"amounts":[17],"of":[18,64,183,194],"data.":[19],"These":[20],"usually":[23],"processed":[24],"by":[25,100],"first":[26],"computing":[27],"exact":[28,119,138],"aggregates":[29],"all":[31],"then":[34],"selecting":[35],"the":[36,39,61,87,90,153],"with":[38,208],"top-k":[40,163],"aggregate":[41,164],"values.":[42],"However,":[43],"such":[44],"an":[45,106],"approach":[46],"can":[47,178],"be":[48],"inefficient":[49],"high-cardinality":[51],"datasets":[53,174],"where":[54],"intermediate":[55],"results":[56],"may":[57],"not":[58],"fit":[59],"within":[60],"local":[62],"cache":[63],"multi-core":[65],"processors":[66],"leading":[67],"to":[68,93,143,155,199],"excessive":[69],"movement.":[71],"To":[72],"address":[73],"this":[74],"problem,":[75],"we":[76],"have":[77,151],"developed":[78],"Zippy,":[79],"a":[80,180],"new":[81],"cache-conscious":[82,210],"framework":[84,154],"that":[85,110,176],"leverages":[86],"skew":[88],"distribution":[92],"minimize":[94],"movements.":[96],"This":[97],"is":[98],"achieved":[99],"designing":[101],"cache-resident":[102],"structures":[104],"adaptive":[107],"multi-pass":[108],"algorithm":[109],"quickly":[111],"identifies":[112],"candidate":[113],"during":[115],"processing,":[116],"performs":[118],"aggregations":[120],"these":[122],"groups.":[123],"The":[124],"non-candidate":[125],"pruned":[128],"cheaply":[129],"using":[130,169],"efficient":[131],"hashing":[132],"partitioning":[134],"techniques":[135,142],"without":[136],"performing":[137],"aggregations.":[139],"We":[140],"develop":[141],"improve":[144],"robustness":[145],"over":[146],"adversarial":[147],"distributions":[149],"optimized":[152],"reuse":[156],"computations":[157],"incrementally":[158],"rolling":[160],"(or":[161],"paginated)":[162],"queries.":[165],"Our":[166],"extensive":[167],"evaluation":[168],"both":[170],"real-world":[171],"synthetic":[173],"demonstrate":[175],"Zippy":[177],"achieve":[179],"median":[181],"speed-up":[182],"more":[184],"than":[185],"3\u00d7":[186],"monotonic":[188],"functions":[190,205],"across":[191],"typical":[192],"ranges":[193],"k":[195],"values":[196],"(e.g.,":[197],"1":[198],"100)":[200],"1.4\u00d7":[202],"non-monotonic":[204],"when":[206],"compared":[207],"state-of-the-art":[209],"techniques.":[212]},"counts_by_year":[],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
