{"id":"https://openalex.org/W7147428094","doi":"https://doi.org/10.48550/arxiv.2603.27775","title":"Enzyme: Incremental View Maintenance for Data Engineering","display_name":"Enzyme: Incremental View Maintenance for Data Engineering","publication_year":2026,"publication_date":"2026-03-29","ids":{"openalex":"https://openalex.org/W7147428094","doi":"https://doi.org/10.48550/arxiv.2603.27775"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.27775","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27775","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.27775","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111014583","display_name":"Ritwik Yadav","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yadav, Ritwik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006533172","display_name":"Supun Abeysinghe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abeysinghe, Supun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132699607","display_name":"Min Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Min","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012564169","display_name":"Jeffrey Helt","orcid":"https://orcid.org/0000-0003-1192-7111"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Helt, Jeffrey","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132693284","display_name":"Manuel Ung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ung, Manuel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100457607","display_name":"Yuhong Chen","orcid":"https://orcid.org/0000-0001-6131-7595"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yuhong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132548764","display_name":"Melody Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Melody","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132544516","display_name":"William Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, William","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132562841","display_name":"Yiming Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yiming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021911378","display_name":"Tom van Bussel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"van Bussel, Tom","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075573393","display_name":"Sourav Chatterji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chatterji, Sourav","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088504528","display_name":"Indrajit Roy","orcid":"https://orcid.org/0000-0002-4766-2664"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roy, Indrajit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132609648","display_name":"Paul Lappas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lappas, Paul","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059167302","display_name":"Yannis Papakonstantinou","orcid":"https://orcid.org/0009-0007-6360-9496"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Papakonstantinou, Yannis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132559366","display_name":"Tahir Fayyaz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fayyaz, Tahir","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132629874","display_name":"Bilal Aslam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aslam, Bilal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003362597","display_name":"Ross Bunker","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bunker, Ross","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073379366","display_name":"Michael Armbrust","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Armbrust, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5052841755","display_name":"Shrikanth Shankar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shankar, Shrikanth","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":19,"corresponding_author_ids":["https://openalex.org/A5111014583"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.8968999981880188,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.8968999981880188,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.03970000147819519,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.015399999916553497,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/materialized-view","display_name":"Materialized view","score":0.6887999773025513},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5870000123977661},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.5562000274658203},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5285000205039978},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4244000017642975},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.41179999709129333},{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.37130001187324524},{"id":"https://openalex.org/keywords/data-consistency","display_name":"Data consistency","score":0.36390000581741333},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.3357999920845032}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7876999974250793},{"id":"https://openalex.org/C98199447","wikidata":"https://www.wikidata.org/wiki/Q2445044","display_name":"Materialized view","level":4,"score":0.6887999773025513},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5870000123977661},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.5562000274658203},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5285000205039978},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5074999928474426},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4244000017642975},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.41179999709129333},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.37130001187324524},{"id":"https://openalex.org/C93361087","wikidata":"https://www.wikidata.org/wiki/Q4426698","display_name":"Data consistency","level":2,"score":0.36390000581741333},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.3357999920845032},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.328900009393692},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.3246000111103058},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3215000033378601},{"id":"https://openalex.org/C33762810","wikidata":"https://www.wikidata.org/wiki/Q461671","display_name":"Data integrity","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.30889999866485596},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.287200003862381},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.2734000086784363},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.27775","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27775","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.27775","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27775","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5827076435089111,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Materialized":[0],"views":[1,100,173],"are":[2],"a":[3,30,91,144,161],"core":[4],"construct":[5],"in":[6,34],"database":[7,56],"systems,":[8],"used":[9],"to":[10,50,70,84,95,182],"accelerate":[11],"analytical":[12],"queries":[13],"and":[14,36,48,115,187,199],"optimize":[15],"batch":[16],"pipelines":[17,132],"for":[18,46,169,195],"extract-transform-load":[19],"(ETL)":[20],"workflows.":[21],"Maintaining":[22],"view":[23,40,209],"consistency":[24],"as":[25,101],"underlying":[26],"data":[27,185],"evolves":[28],"is":[29,180],"fundamental":[31],"challenge,":[32],"especially":[33],"high-throughput":[35],"real-time":[37],"settings.":[38],"Incremental":[39],"maintenance":[41],"(IVM)":[42],"has":[43,137],"been":[44],"studied":[45],"decades":[47],"continues":[49],"attract":[51],"significant":[52,217],"investment":[53],"from":[54],"major":[55],"vendors.":[57],"However,":[58],"most":[59],"industrial":[60],"systems":[61],"either":[62],"offer":[63],"limited":[64],"SQL-operator":[65],"coverage":[66],"or":[67],"require":[68],"users":[69,117],"hand-tune":[71],"refresh":[72,107,167,197],"strategies.":[73],"This":[74],"paper":[75],"presents":[76],"Enzyme,":[77],"an":[78],"IVM":[79],"engine":[80],"developed":[81],"at":[82,220],"Databricks":[83],"power":[85],"Spark":[86,157],"Declarative":[87],"Pipelines.":[88],"It":[89],"provides":[90],"built-in,":[92],"end-to-end":[93],"approach":[94],"incremental":[96,196],"pipelines,":[97],"utilizing":[98],"materialized":[99,172,208],"first-class":[102],"building":[103],"blocks.":[104],"By":[105],"automating":[106],"planning,":[108],"Enzyme":[109,159],"reduces":[110],"total":[111],"cost":[112],"of":[113,129,149,151,171],"ownership":[114],"lets":[116],"focus":[118],"on":[119,213],"business":[120],"logic":[121],"rather":[122],"than":[123],"MV":[124],"mechanics.":[125],"Validation":[126],"across":[127,184,207],"thousands":[128],"large-scale":[130],"production":[131],"spanning":[133],"diverse":[134],"application":[135],"domains":[136],"demonstrated":[138],"substantial":[139],"computational":[140],"efficiency":[141],"gains,":[142],"yielding":[143],"cumulative":[145],"daily":[146],"compute":[147],"reduction":[148],"billions":[150],"CPU":[152],"seconds.":[153],"Built":[154],"atop":[155],"Apache":[156],"primitives,":[158],"adds":[160],"cost-based":[162],"optimization":[163],"layer":[164],"that":[165,203],"selects":[166],"strategies":[168],"collections":[170],"organized":[174],"into":[175],"pipelines.":[176],"Enzyme's":[177],"modular":[178],"architecture":[179],"designed":[181],"generalize":[183],"sources":[186],"query":[188],"engines.":[189],"We":[190],"present":[191],"key":[192],"design":[193],"decisions":[194],"planning":[198],"execution,":[200],"including":[201],"optimizations":[202],"exploit":[204],"batching":[205],"opportunities":[206],"sources.":[210],"Experimental":[211],"results":[212],"standard":[214],"benchmarks":[215],"demonstrate":[216],"performance":[218],"improvements":[219],"scale.":[221]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-02T00:00:00"}
