{"id":"https://openalex.org/W4319586544","doi":"https://doi.org/10.1109/dsaa54385.2022.10032386","title":"Separating Storage and Compute with the Databricks Lakehouse Platform","display_name":"Separating Storage and Compute with the Databricks Lakehouse Platform","publication_year":2022,"publication_date":"2022-10-13","ids":{"openalex":"https://openalex.org/W4319586544","doi":"https://doi.org/10.1109/dsaa54385.2022.10032386"},"language":"en","primary_location":{"id":"doi:10.1109/dsaa54385.2022.10032386","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dsaa54385.2022.10032386","pdf_url":null,"source":{"id":"https://openalex.org/S4363608340","display_name":"2022 IEEE 9th International Conference on Data Science and Advanced Analytics (DSAA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 9th International Conference on Data Science and Advanced Analytics (DSAA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062654062","display_name":"Deeptaanshu Kumar","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Deeptaanshu Kumar","raw_affiliation_strings":["Carnegie Mellon University,Electrical &amp; Computer Engineering,Washington DC,USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Electrical &amp; Computer Engineering,Washington DC,USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057852052","display_name":"Suxi Li","orcid":null},"institutions":[{"id":"https://openalex.org/I145608581","display_name":"University of Miami","ror":"https://ror.org/02dgjyy92","country_code":"US","type":"education","lineage":["https://openalex.org/I145608581"]},{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Suxi Li","raw_affiliation_strings":["Economics University of Miami,Los Angeles,USA","Economics University of Miami, Los Angeles, USA"],"affiliations":[{"raw_affiliation_string":"Economics University of Miami,Los Angeles,USA","institution_ids":["https://openalex.org/I145608581","https://openalex.org/I161318765"]},{"raw_affiliation_string":"Economics University of Miami, Los Angeles, USA","institution_ids":["https://openalex.org/I145608581"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5062654062"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":1.8014,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.87091591,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9602000117301941,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7747593522071838},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7367318272590637},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.720844030380249},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.706642746925354},{"id":"https://openalex.org/keywords/computer-data-storage","display_name":"Computer data storage","score":0.6406713724136353},{"id":"https://openalex.org/keywords/data-warehouse","display_name":"Data warehouse","score":0.6033102869987488},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5498952865600586},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4295791685581207},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4190386235713959},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4041220545768738},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3682883381843567},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.36357009410858154},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.13912123441696167}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7747593522071838},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7367318272590637},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.720844030380249},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.706642746925354},{"id":"https://openalex.org/C194739806","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Computer data storage","level":2,"score":0.6406713724136353},{"id":"https://openalex.org/C135572916","wikidata":"https://www.wikidata.org/wiki/Q193351","display_name":"Data warehouse","level":2,"score":0.6033102869987488},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5498952865600586},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4295791685581207},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4190386235713959},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4041220545768738},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3682883381843567},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.36357009410858154},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.13912123441696167},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dsaa54385.2022.10032386","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dsaa54385.2022.10032386","pdf_url":null,"source":{"id":"https://openalex.org/S4363608340","display_name":"2022 IEEE 9th International Conference on Data Science and Advanced Analytics (DSAA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 9th International Conference on Data Science and Advanced Analytics (DSAA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.6200000047683716}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2364921833","https://openalex.org/W2382623646","https://openalex.org/W2385146268","https://openalex.org/W1596201972","https://openalex.org/W1788737569","https://openalex.org/W1986253068","https://openalex.org/W1767718647","https://openalex.org/W2380023786","https://openalex.org/W1588015694","https://openalex.org/W2090871327"],"abstract_inverted_index":{"As":[0,112],"a":[1,14,45,113],"part":[2],"of":[3,59,84,109,187],"The":[4],"Arena":[5],"Group\u2019s":[6],"Data":[7,23,26,140,167,189,192],"&":[8],"AI":[9],"Team,":[10],"we":[11,40,115,173,183],"are":[12,41],"architecting":[13],"new":[15],"unified":[16],"data":[17,49,61,77,86,120],"platform":[18,50],"that":[19,51,73,122],"can":[20],"handle":[21],"both":[22],"Engineering":[24,141,190],"and":[25,47,62,65,92,125,130,149,180,191],"Science":[27,168,193],"use":[28,199],"cases":[29],"for":[30,99,166],"all":[31],"the":[32,82,106],"company\u2019s":[33],"needs.":[34,169],"In":[35,170],"order":[36],"to":[37,43,55,97,104,118,195],"accomplish":[38],"this,":[39],"working":[42],"create":[44],"scalable":[46],"cost-effective":[48],"will":[52,174,184],"allow":[53],"us":[54,96,151,163],"store":[56,105],"large":[57],"volumes":[58],"historical":[60,110],"process,":[63],"transform,":[64],"query":[66],"it":[67,89,136],"with":[68,153],"variable":[69],"workloads.":[70],"This":[71],"means":[72],"our":[74,85,139,177,197],"current":[75],"Redshift":[76],"warehouse":[78],"cannot":[79],"serve":[80],"as":[81,128],"backbone":[83],"platform,":[87],"since":[88],"couples":[90],"storage":[91,124,145],"compute,":[93,126],"which":[94],"forces":[95],"pay":[98],"increased":[100],"compute":[101,154],"nodes":[102],"just":[103],"growing":[107],"amounts":[108],"data.":[111],"result,":[114],"set":[116],"out":[117],"explore":[119],"platforms":[121],"decoupled":[123],"such":[127],"Snowflake":[129],"Databricks.":[131],"We":[132],"chose":[133],"Databricks":[134],"because":[135],"adequately":[137],"serves":[138],"needs":[142],"by":[143,155],"keeping":[144],"on":[146],"AWS":[147],"S3":[148],"gives":[150],"flexibility":[152],"using":[156],"ad-hoc":[157],"Spark":[158],"clusters.":[159],"It":[160],"also":[161],"offers":[162],"more":[164],"capabilities":[165,194],"this":[171],"paper,":[172],"go":[175],"over":[176],"proposed":[178],"architecture":[179],"explain":[181],"how":[182],"take":[185],"advantage":[186],"these":[188],"address":[196],"initial":[198],"cases.":[200]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
