{"id":"https://openalex.org/W2982284108","doi":"https://doi.org/10.1109/tkde.2020.3006446","title":"Exploiting Data Skew for Improved Query Performance","display_name":"Exploiting Data Skew for Improved Query Performance","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W2982284108","doi":"https://doi.org/10.1109/tkde.2020.3006446","mag":"2982284108"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2020.3006446","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2020.3006446","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1910.10063","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wangda Zhang","orcid":"https://orcid.org/0000-0002-4965-8132"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wangda Zhang","raw_affiliation_strings":["Columbia University, New York, NY, USA"],"raw_orcid":"https://orcid.org/0000-0002-4965-8132","affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":null,"display_name":"Kenneth A. Ross","orcid":"https://orcid.org/0000-0001-9397-6990"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kenneth A. Ross","raw_affiliation_strings":["Columbia University, New York, NY, USA"],"raw_orcid":"https://orcid.org/0000-0001-9397-6990","affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":0.9712,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.76964354,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"34","issue":"5","first_page":"2176","last_page":"2189"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.31380000710487366,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.31380000710487366,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.28450000286102295,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.2371000051498413,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.8064000010490417},{"id":"https://openalex.org/keywords/skew","display_name":"Skew","score":0.63919997215271},{"id":"https://openalex.org/keywords/zipfs-law","display_name":"Zipf's law","score":0.5896999835968018},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.5827000141143799},{"id":"https://openalex.org/keywords/cache-invalidation","display_name":"Cache invalidation","score":0.47119998931884766},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.4496999979019165},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.3970000147819519},{"id":"https://openalex.org/keywords/smart-cache","display_name":"Smart Cache","score":0.38530001044273376},{"id":"https://openalex.org/keywords/cache-oblivious-algorithm","display_name":"Cache-oblivious algorithm","score":0.3434999883174896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8974000215530396},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.8064000010490417},{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.63919997215271},{"id":"https://openalex.org/C125932096","wikidata":"https://www.wikidata.org/wiki/Q205472","display_name":"Zipf's law","level":2,"score":0.5896999835968018},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.5827000141143799},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4731000065803528},{"id":"https://openalex.org/C25536678","wikidata":"https://www.wikidata.org/wiki/Q5015977","display_name":"Cache invalidation","level":5,"score":0.47119998931884766},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.4496999979019165},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3970000147819519},{"id":"https://openalex.org/C167713795","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"Smart Cache","level":5,"score":0.38530001044273376},{"id":"https://openalex.org/C59687516","wikidata":"https://www.wikidata.org/wiki/Q5015938","display_name":"Cache-oblivious algorithm","level":5,"score":0.3434999883174896},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C113166858","wikidata":"https://www.wikidata.org/wiki/Q5015981","display_name":"Cache pollution","level":5,"score":0.3280999958515167},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.32339999079704285},{"id":"https://openalex.org/C135572916","wikidata":"https://www.wikidata.org/wiki/Q193351","display_name":"Data warehouse","level":2,"score":0.3188000023365021},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3181000053882599},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C36340418","wikidata":"https://www.wikidata.org/wiki/Q7124288","display_name":"Page cache","level":5,"score":0.3100999891757965},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.30970001220703125},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.296999990940094},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C136736807","wikidata":"https://www.wikidata.org/wiki/Q818943","display_name":"Range query (database)","level":5,"score":0.2759000062942505},{"id":"https://openalex.org/C198352243","wikidata":"https://www.wikidata.org/wiki/Q37105","display_name":"Line (geometry)","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C59276292","wikidata":"https://www.wikidata.org/wiki/Q580427","display_name":"Database index","level":3,"score":0.27390000224113464},{"id":"https://openalex.org/C172722865","wikidata":"https://www.wikidata.org/wiki/Q2302053","display_name":"Spatial query","level":5,"score":0.26969999074935913},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2637999951839447},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.25589999556541443},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.25540000200271606}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tkde.2020.3006446","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2020.3006446","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1910.10063","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1910.10063","pdf_url":"https://arxiv.org/pdf/1910.10063","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1910.10063","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1910.10063","pdf_url":"https://arxiv.org/pdf/1910.10063","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W44122813","https://openalex.org/W1194153933","https://openalex.org/W1791587242","https://openalex.org/W1916377347","https://openalex.org/W1967601791","https://openalex.org/W1969877208","https://openalex.org/W1987238352","https://openalex.org/W1987699222","https://openalex.org/W1991223838","https://openalex.org/W1999883293","https://openalex.org/W2000042664","https://openalex.org/W2006552857","https://openalex.org/W2011129635","https://openalex.org/W2013042849","https://openalex.org/W2014977566","https://openalex.org/W2029866183","https://openalex.org/W2029891792","https://openalex.org/W2072541977","https://openalex.org/W2080723374","https://openalex.org/W2082695854","https://openalex.org/W2086977914","https://openalex.org/W2088856850","https://openalex.org/W2096496252","https://openalex.org/W2102729946","https://openalex.org/W2103670492","https://openalex.org/W2104003087","https://openalex.org/W2105079611","https://openalex.org/W2106771621","https://openalex.org/W2110445267","https://openalex.org/W2117546628","https://openalex.org/W2125529470","https://openalex.org/W2132846504","https://openalex.org/W2138520521","https://openalex.org/W2140509629","https://openalex.org/W2144839430","https://openalex.org/W2147076738","https://openalex.org/W2150630976","https://openalex.org/W2155070484","https://openalex.org/W2156000708","https://openalex.org/W2166955231","https://openalex.org/W2183162925","https://openalex.org/W2328769609","https://openalex.org/W2340076492","https://openalex.org/W2406955896","https://openalex.org/W2439390339","https://openalex.org/W2440477515","https://openalex.org/W2612094043","https://openalex.org/W2765206444","https://openalex.org/W2795326976","https://openalex.org/W2798416929","https://openalex.org/W2798926543","https://openalex.org/W2950627632","https://openalex.org/W3141434431","https://openalex.org/W4244768633","https://openalex.org/W4245765815","https://openalex.org/W4292080258","https://openalex.org/W6635650928","https://openalex.org/W6636459745","https://openalex.org/W6641330010","https://openalex.org/W6675558867","https://openalex.org/W6677064796","https://openalex.org/W6678708722","https://openalex.org/W6713466670","https://openalex.org/W6733403177","https://openalex.org/W6744271739","https://openalex.org/W6760989303"],"related_works":[],"abstract_inverted_index":{"Analytic":[0],"queries":[1,201],"enable":[2],"sophisticated":[3],"large-scale":[4],"data":[5,75,115,128,182],"analysis":[6],"within":[7],"many":[8],"commercial,":[9],"scientific":[10],"and":[11,56,146,163,180],"medical":[12],"domains":[13],"today.":[14],"Data":[15],"skew":[16,186],"is":[17,142],"a":[18,26,39,45,49,57,64,93,104,113,122,155],"ubiquitous":[19],"feature":[20],"of":[21,52,60,86,107,149,173,206],"these":[22],"real-world":[23],"domains.":[24],"In":[25,38,63,91,117,193],"retail":[27],"database,":[28,41,66],"some":[29,67,194],"products":[30],"are":[31,168],"typically":[32],"much":[33,70],"more":[34],"popular":[35,114,132],"than":[36,77],"others.":[37,78],"text":[40],"word":[42],"frequencies":[43],"follow":[44],"Zipf":[46],"distribution":[47],"with":[48],"small":[50,105],"number":[51],"very":[53],"common":[54],"words,":[55],"long":[58],"tail":[59],"infrequent":[61],"words.":[62],"geographic":[65],"regions":[68],"have":[69],"higher":[71],"populations":[72],"(and":[73],"therefore":[74],"measurements)":[76],"Current":[79],"systems":[80],"do":[81],"not":[82],"make":[83],"the":[84,108,135,160,171],"most":[85],"caches":[87],"for":[88,126,158],"exploiting":[89,185],"skew.":[90,174],"particular,":[92],"whole":[94],"cache":[95,99,109,137,151,161],"line":[96,110],"may":[97],"remain":[98],"resident":[100],"even":[101],"though":[102],"only":[103],"part":[106],"corresponds":[111],"to":[112,130],"item.":[116],"this":[118],"article,":[119],"we":[120],"propose":[121],"novel":[123],"index":[124],"structure":[125],"repositioning":[127],"items":[129,133],"concentrate":[131],"into":[134],"same":[136],"lines.":[138],"The":[139],"net":[140],"result":[141],"better":[143,147],"spatial":[144],"locality,":[145],"utilization":[148],"limited":[150],"resources.":[152],"We":[153],"develop":[154],"theoretical":[156],"model":[157],"analyzing":[159],"utilization,":[162],"implement":[164],"database":[165],"operators":[166],"that":[167,184],"efficient":[169],"in":[170],"presence":[172],"Our":[175],"experimental":[176],"evaluation":[177],"on":[178],"real":[179],"synthetic":[181],"shows":[183],"can":[187,198],"significantly":[188],"improve":[189],"in-memory":[190],"query":[191],"performance.":[192],"cases,":[195],"our":[196],"techniques":[197],"speed":[199],"up":[200],"by":[202],"over":[203],"an":[204],"order":[205],"magnitude.":[207]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2019-11-01T00:00:00"}
