{"id":"https://openalex.org/W4282570934","doi":"https://doi.org/10.1145/3514221.3526186","title":"Materialization and Reuse Optimizations for Production Data Science Pipelines","display_name":"Materialization and Reuse Optimizations for Production Data Science Pipelines","publication_year":2022,"publication_date":"2022-06-10","ids":{"openalex":"https://openalex.org/W4282570934","doi":"https://doi.org/10.1145/3514221.3526186"},"language":"en","primary_location":{"id":"doi:10.1145/3514221.3526186","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3514221.3526186","pdf_url":null,"source":{"id":"https://openalex.org/S4363608845","display_name":"Proceedings of the 2022 International Conference on Management of Data","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003218472","display_name":"Behrouz Derakhshan","orcid":null},"institutions":[{"id":"https://openalex.org/I33256026","display_name":"German Research Centre for Artificial Intelligence","ror":"https://ror.org/01ayc5b57","country_code":"DE","type":"funder","lineage":["https://openalex.org/I33256026"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Behrouz Derakhshan","raw_affiliation_strings":["DFKI GmbH, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"DFKI GmbH, Berlin, Germany","institution_ids":["https://openalex.org/I33256026"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087402246","display_name":"Alireza Rezaei Mahdiraji","orcid":null},"institutions":[{"id":"https://openalex.org/I4210144380","display_name":"Yara (Germany)","ror":"https://ror.org/039hegs97","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210144380","https://openalex.org/I67171562"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alireza Rezaei Mahdiraji","raw_affiliation_strings":["Yara Digital Production, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Yara Digital Production, Berlin, Germany","institution_ids":["https://openalex.org/I4210144380"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068470780","display_name":"Zoi Kaoudi","orcid":"https://orcid.org/0000-0003-4520-5360"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Zoi Kaoudi","raw_affiliation_strings":["TU Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"TU Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002030730","display_name":"Tilmann Rabl","orcid":"https://orcid.org/0009-0009-3335-8045"},"institutions":[{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tilmann Rabl","raw_affiliation_strings":["HPI &amp; University of Potsdam, Potsdam, Germany"],"affiliations":[{"raw_affiliation_string":"HPI &amp; University of Potsdam, Potsdam, Germany","institution_ids":["https://openalex.org/I176453806"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002413906","display_name":"Volker Markl","orcid":"https://orcid.org/0009-0009-0964-026X"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Volker Markl","raw_affiliation_strings":["TU Berlin &amp; DFKI GmbH, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"TU Berlin &amp; DFKI GmbH, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5003218472"],"corresponding_institution_ids":["https://openalex.org/I33256026"],"apc_list":null,"apc_paid":null,"fwci":0.602,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.7638671,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1962","last_page":"1976"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.8299133777618408},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.798443078994751},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.7923884391784668},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.742397665977478},{"id":"https://openalex.org/keywords/directed-acyclic-graph","display_name":"Directed acyclic graph","score":0.6259915828704834},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.5547811985015869},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.47846388816833496},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4552232027053833},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3505572974681854},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3287278711795807},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.25276029109954834},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16842305660247803},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.1683274507522583},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1349637508392334},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11202216148376465},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10788944363594055}],"concepts":[{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.8299133777618408},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.798443078994751},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.7923884391784668},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.742397665977478},{"id":"https://openalex.org/C74197172","wikidata":"https://www.wikidata.org/wiki/Q1195339","display_name":"Directed acyclic graph","level":2,"score":0.6259915828704834},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.5547811985015869},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.47846388816833496},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4552232027053833},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3505572974681854},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3287278711795807},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.25276029109954834},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16842305660247803},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.1683274507522583},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1349637508392334},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11202216148376465},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10788944363594055},{"id":"https://openalex.org/C87717796","wikidata":"https://www.wikidata.org/wiki/Q146326","display_name":"Environmental engineering","level":1,"score":0.0},{"id":"https://openalex.org/C548081761","wikidata":"https://www.wikidata.org/wiki/Q180388","display_name":"Waste management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3514221.3526186","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3514221.3526186","pdf_url":null,"source":{"id":"https://openalex.org/S4363608845","display_name":"Proceedings of the 2022 International Conference on Management of Data","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5400000214576721,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W297231882","https://openalex.org/W1490347130","https://openalex.org/W1934084512","https://openalex.org/W1963445704","https://openalex.org/W2058978608","https://openalex.org/W2060114273","https://openalex.org/W2076618162","https://openalex.org/W2081417442","https://openalex.org/W2085426673","https://openalex.org/W2103201239","https://openalex.org/W2106019582","https://openalex.org/W2121990650","https://openalex.org/W2126547925","https://openalex.org/W2157355837","https://openalex.org/W2166597031","https://openalex.org/W2542459869","https://openalex.org/W2611130659","https://openalex.org/W2645036108","https://openalex.org/W2743948853","https://openalex.org/W2796293253","https://openalex.org/W2798535736","https://openalex.org/W2921455415","https://openalex.org/W2951321531","https://openalex.org/W2970613315","https://openalex.org/W2997591727","https://openalex.org/W2998715488","https://openalex.org/W3000677617","https://openalex.org/W3004286518","https://openalex.org/W3007818867","https://openalex.org/W3011592252","https://openalex.org/W3031462348","https://openalex.org/W3035597669","https://openalex.org/W4240301789"],"related_works":["https://openalex.org/W4380433113","https://openalex.org/W4386072068","https://openalex.org/W252339960","https://openalex.org/W2390529043","https://openalex.org/W2378320433","https://openalex.org/W2358343511","https://openalex.org/W2051877971","https://openalex.org/W1787170397","https://openalex.org/W4292347844","https://openalex.org/W2330191542"],"abstract_inverted_index":{"Many":[0],"companies":[1],"and":[2,5,60,95,152],"businesses":[3],"train":[4,73],"deploy":[6],"machine":[7],"learning":[8],"(ML)":[9],"pipelines":[10,39,65,145],"to":[11,27,35,66,85,122,137,172],"answer":[12],"prediction":[13],"queries.":[14],"In":[15],"many":[16,64],"applications,":[17],"new":[18],"training":[19,168],"data":[20,80,88],"continuously":[21],"becomes":[22],"available.":[23],"A":[24],"typical":[25],"approach":[26],"ensure":[28],"that":[29,111,162],"ML":[30,38],"models":[31],"are":[32],"up-to-date":[33],"is":[34,84],"retrain":[36],"the":[37,47,117,120,124,128,144,154,167],"following":[40],"a":[41,108,113,134,147],"schedule,":[42],"e.g.,":[43],"every":[44],"day":[45],"on":[46],"last":[48],"seven":[49],"days":[50],"of":[51,101,119,127,175],"data.":[52],"Several":[53],"use":[54],"cases,":[55],"such":[56,91],"as":[57],"A/B":[58],"testing":[59],"ensemble":[61],"learning,":[62],"require":[63],"be":[67],"deployed":[68],"in":[69,90],"parallel.":[70],"Existing":[71],"solutions":[72],"each":[74],"pipeline":[75],"separately,":[76],"which":[77],"generates":[78],"redundant":[79,87],"processing.":[81],"Our":[82,98,159],"goal":[83],"eliminate":[86],"processing":[89],"scenarios":[92],"using":[93],"materialization":[94,109],"reuse":[96,135],"optimizations.":[97],"solution":[99,164],"comprises":[100],"two":[102],"main":[103],"parts.":[104],"First,":[105],"we":[106,132],"propose":[107],"algorithm":[110,136],"given":[112],"storage":[114],"budget,":[115],"materializes":[116],"subset":[118],"artifacts":[121,156],"minimize":[123],"run":[125],"time":[126,169],"subsequent":[129],"executions.":[130],"Second,":[131],"design":[133],"generate":[138],"an":[139,173],"execution":[140],"plan":[141],"by":[142,170],"combining":[143],"into":[146],"directed":[148],"acyclic":[149],"graph":[150],"(DAG)":[151],"reusing":[153],"materialized":[155],"when":[157],"appropriate.":[158],"experiments":[160],"show":[161],"our":[163],"can":[165],"reduce":[166],"up":[171],"order":[174],"magnitude":[176],"for":[177],"different":[178],"deployment":[179],"scenarios.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
