{"id":"https://openalex.org/W4402042983","doi":"https://doi.org/10.14778/3681954.3682013","title":"Partition, Don't Sort! Compression Boosters for Cloud Data Ingestion Pipelines","display_name":"Partition, Don't Sort! Compression Boosters for Cloud Data Ingestion Pipelines","publication_year":2024,"publication_date":"2024-07-01","ids":{"openalex":"https://openalex.org/W4402042983","doi":"https://doi.org/10.14778/3681954.3682013"},"language":"en","primary_location":{"id":"doi:10.14778/3681954.3682013","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3681954.3682013","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039935651","display_name":"Patrick Hansert","orcid":"https://orcid.org/0000-0002-7592-3542"},"institutions":[{"id":"https://openalex.org/I4387152675","display_name":"Rheinland-Pf\u00e4lzische Technische Universit\u00e4t Kaiserslautern-Landau","ror":"https://ror.org/01qrts582","country_code":null,"type":"education","lineage":["https://openalex.org/I4387152675"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Patrick Hansert","raw_affiliation_strings":["RPTU Kaiserslautern-Landau, Kaiserslautern, Germany"],"affiliations":[{"raw_affiliation_string":"RPTU Kaiserslautern-Landau, Kaiserslautern, Germany","institution_ids":["https://openalex.org/I4387152675"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076474375","display_name":"Sebastian Michel","orcid":"https://orcid.org/0000-0002-2238-0185"},"institutions":[{"id":"https://openalex.org/I4387152675","display_name":"Rheinland-Pf\u00e4lzische Technische Universit\u00e4t Kaiserslautern-Landau","ror":"https://ror.org/01qrts582","country_code":null,"type":"education","lineage":["https://openalex.org/I4387152675"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sebastian Michel","raw_affiliation_strings":["RPTU Kaiserslautern-Landau, Kaiserslautern, Germany"],"affiliations":[{"raw_affiliation_string":"RPTU Kaiserslautern-Landau, Kaiserslautern, Germany","institution_ids":["https://openalex.org/I4387152675"]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5039935651"],"corresponding_institution_ids":["https://openalex.org/I4387152675"],"apc_list":null,"apc_paid":null,"fwci":0.3663,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60722104,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"17","issue":"11","first_page":"3456","last_page":"3469"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sort","display_name":"sort","score":0.7538152933120728},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.5935144424438477},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.49248775839805603},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4145374894142151},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.19915279746055603},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16004565358161926},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.08880165219306946},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.08085557818412781}],"concepts":[{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.7538152933120728},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.5935144424438477},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.49248775839805603},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4145374894142151},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.19915279746055603},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16004565358161926},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.08880165219306946},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.08085557818412781}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3681954.3682013","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3681954.3682013","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W1497953515","https://openalex.org/W1533886078","https://openalex.org/W1547206386","https://openalex.org/W1780185704","https://openalex.org/W1964691382","https://openalex.org/W1978010829","https://openalex.org/W1982092405","https://openalex.org/W1986055680","https://openalex.org/W1994211684","https://openalex.org/W1995747699","https://openalex.org/W1998271633","https://openalex.org/W1998964210","https://openalex.org/W2015190123","https://openalex.org/W2063368144","https://openalex.org/W2063540853","https://openalex.org/W2072605585","https://openalex.org/W2074935284","https://openalex.org/W2080461640","https://openalex.org/W2095946447","https://openalex.org/W2115440939","https://openalex.org/W2123686039","https://openalex.org/W2139000699","https://openalex.org/W2261489614","https://openalex.org/W2408388496","https://openalex.org/W2558012959","https://openalex.org/W2574861468","https://openalex.org/W2575168421","https://openalex.org/W2604790380","https://openalex.org/W2612636236","https://openalex.org/W2751338424","https://openalex.org/W2756982556","https://openalex.org/W2788260429","https://openalex.org/W2793317603","https://openalex.org/W2887418847","https://openalex.org/W2898115873","https://openalex.org/W3032251503","https://openalex.org/W3085477028","https://openalex.org/W3085940077","https://openalex.org/W3158312191","https://openalex.org/W3173993340","https://openalex.org/W3174369108","https://openalex.org/W3175055685","https://openalex.org/W4280537071","https://openalex.org/W4281775561","https://openalex.org/W4283784934","https://openalex.org/W4289866506","https://openalex.org/W4367681270","https://openalex.org/W4372263685","https://openalex.org/W4377843267","https://openalex.org/W4383605160","https://openalex.org/W4383749451","https://openalex.org/W4385283100","https://openalex.org/W4389332122","https://openalex.org/W4398234629"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W4244478748","https://openalex.org/W2361805396","https://openalex.org/W2972254340","https://openalex.org/W4223488648","https://openalex.org/W2134969820","https://openalex.org/W2251605416","https://openalex.org/W2560439919","https://openalex.org/W4389340727"],"abstract_inverted_index":{"Data":[0],"Lakes":[1],"deployed":[2],"in":[3],"the":[4,15,48,66,102,114,127,137],"cloud":[5,44],"are":[6],"a":[7,61,92,130,146],"go-to":[8],"solution":[9,63],"for":[10],"enterprise":[11],"data":[12,49,73,79],"storage.":[13,39],"While":[14],"pay-as-you-go":[16],"cost":[17],"model":[18],"allows":[19],"flexible":[20],"resource":[21],"allocation":[22],"and":[23,37,50],"billing,":[24],"it":[25],"mandates":[26],"an":[27],"efficient":[28],"use":[29],"of":[30,43,68],"resources":[31],"like":[32],"CPU":[33],"hours,":[34],"network":[35],"traffic,":[36],"used":[38],"The":[40],"distributed":[41],"nature":[42],"environments":[45],"necessitates":[46],"partitioning":[47],"processing":[51],"these":[52],"partitions":[53,86],"separately.":[54],"In":[55],"this":[56],"work,":[57],"we":[58],"put":[59],"forward":[60],"practical":[62],"to":[64,108,118],"improve":[65],"efficiency":[67],"compression":[69,115,131],"algorithms":[70],"on":[71],"Dremel-encoded":[72],"by":[74,96,106,116],"clustering":[75,93],"similarly":[76],"structured":[77],"nested":[78],"at":[80,145],"ingestion":[81,148],"time,":[82],"such":[83],"that":[84,99,124,133],"compressible":[85],"can":[87,141],"be":[88,142],"created.":[89],"We":[90,121],"propose":[91],"approach":[94,105],"inspired":[95],"decision":[97],"trees":[98],"outpaces":[100],"even":[101],"naive":[103],"partition-then-sort":[104],"up":[107,117],"factor":[109,119],"17.44":[110],"while":[111],"also":[112],"boosting":[113],"2.":[120],"further":[122],"show":[123],"when":[125],"sorting":[126],"individual":[128],"buckets,":[129],"boost":[132],"is":[134],"competitive":[135],"with":[136],"well-established":[138],"increasing-cardinality":[139],"heuristic":[140],"achieved,":[143],"but":[144],"lower":[147],"time.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
