{"id":"https://openalex.org/W4404181225","doi":"https://doi.org/10.14778/3685800.3685834","title":"Petabyte-Scale Row-Level Operations in Data Lakehouses","display_name":"Petabyte-Scale Row-Level Operations in Data Lakehouses","publication_year":2024,"publication_date":"2024-08-01","ids":{"openalex":"https://openalex.org/W4404181225","doi":"https://doi.org/10.14778/3685800.3685834"},"language":"en","primary_location":{"id":"doi:10.14778/3685800.3685834","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3685800.3685834","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114567538","display_name":"Anton Okolnychyi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Anton Okolnychyi","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081304869","display_name":"Chao Sun","orcid":"https://orcid.org/0009-0000-8192-3496"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chao Sun","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114567539","display_name":"Kazuyuki Tanimura","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kazuyuki Tanimura","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090608628","display_name":"Robert Spitzer","orcid":"https://orcid.org/0000-0003-2753-1912"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Russell Spitzer","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114567540","display_name":"Ryan Blue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ryan Blue","raw_affiliation_strings":["Tabular"],"affiliations":[{"raw_affiliation_string":"Tabular","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049051097","display_name":"Shirley Ho","orcid":"https://orcid.org/0000-0002-1068-160X"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Szehon Ho","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112602230","display_name":"Yongjie Gu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yufei Gu","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114567541","display_name":"Vishwanath Lakkundi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Vishwanath Lakkundi","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003563618","display_name":"DB Tsai","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"DB Tsai","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5114567538"],"corresponding_institution_ids":["https://openalex.org/I4210107260"],"apc_list":null,"apc_paid":null,"fwci":1.4653,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.84046131,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"17","issue":"12","first_page":"4159","last_page":"4172"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/petabyte","display_name":"Petabyte","score":0.9648278951644897},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.7056640386581421},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4657873213291168},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.32801273465156555},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2623808979988098},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.22079381346702576},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18198105692863464},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.13696718215942383},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.13252276182174683}],"concepts":[{"id":"https://openalex.org/C13600138","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Petabyte","level":3,"score":0.9648278951644897},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.7056640386581421},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4657873213291168},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.32801273465156555},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2623808979988098},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.22079381346702576},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18198105692863464},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.13696718215942383},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.13252276182174683}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3685800.3685834","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3685800.3685834","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1519039476","https://openalex.org/W1585029481","https://openalex.org/W1967091776","https://openalex.org/W1993892970","https://openalex.org/W2035543557","https://openalex.org/W2038412523","https://openalex.org/W2068739275","https://openalex.org/W2088398920","https://openalex.org/W2102294813","https://openalex.org/W2117546628","https://openalex.org/W2140453381","https://openalex.org/W2145195191","https://openalex.org/W2159886933","https://openalex.org/W2173213060","https://openalex.org/W2294209438","https://openalex.org/W2430301697","https://openalex.org/W2433128352","https://openalex.org/W2542459869","https://openalex.org/W2725226870","https://openalex.org/W2912516925","https://openalex.org/W2925032266","https://openalex.org/W3004616691","https://openalex.org/W3007024586","https://openalex.org/W3084687783","https://openalex.org/W3085477028","https://openalex.org/W3085940077","https://openalex.org/W3198412807"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W1538652242","https://openalex.org/W2011521129","https://openalex.org/W4379164835","https://openalex.org/W2461968736","https://openalex.org/W2031298432","https://openalex.org/W4386544342","https://openalex.org/W2270443735"],"abstract_inverted_index":{"Data":[0],"lakehouses":[1],"combine":[2],"the":[3,15,58,104,115,121,126],"almost":[4],"infinite":[5],"scale":[6],"and":[7,17,34,50,66,86,102,118],"diverse":[8],"tooling":[9],"of":[10,19,46,106,120,138],"a":[11,20],"data":[12,21,29,75,108],"lake":[13],"with":[14,37,73,93,99,109],"reliability":[16],"functionality":[18],"warehouse.":[22],"This":[23],"paper":[24,79],"presents":[25],"extensions":[26],"that":[27,69,129],"enhance":[28],"lake-houses":[30],"using":[31],"Apache":[32,35],"Iceberg":[33],"Spark":[36],"performant":[38],"petabyte-scale":[39],"row-level":[40,88],"operations.":[41],"The":[42,78],"framework":[43],"is":[44],"capable":[45],"handling":[47],"both":[48],"high-density":[49],"sparse":[51],"modifications":[52],"by":[53],"either":[54],"materializing":[55],"changes":[56],"at":[57],"file":[59],"level":[60],"during":[61,76],"writes":[62],"or":[63],"producing":[64],"equality":[65],"position":[67],"deletes":[68],"are":[70],"lazily":[71],"merged":[72],"existing":[74],"reads.":[77],"also":[80,134],"outlines":[81],"essential":[82],"improvements":[83],"in":[84,141],"determining":[85],"applying":[87],"changes:":[89],"eliminating":[90],"expensive":[91],"shuffles":[92],"storage-partitioned":[94],"joins,":[95],"minimizing":[96],"write":[97],"amplification":[98],"runtime":[100],"filtering,":[101],"optimizing":[103],"layout":[105],"output":[107],"adaptive":[110],"writes.":[111],"Our":[112],"evaluation":[113],"demonstrates":[114],"relative":[116],"strengths":[117],"weaknesses":[119],"various":[122],"materialization":[123],"strategies,":[124],"highlighting":[125],"use":[127],"cases":[128],"require":[130],"each":[131],"technique.":[132],"We":[133],"show":[135],"an":[136],"order":[137],"magnitude":[139],"improvement":[140],"performance":[142],"after":[143],"our":[144],"enhancements.":[145]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
