{"id":"https://openalex.org/W2971290973","doi":"https://doi.org/10.14778/3342263.3342633","title":"An intermediate representation for optimizing machine learning pipelines","display_name":"An intermediate representation for optimizing machine learning pipelines","publication_year":2019,"publication_date":"2019-07-01","ids":{"openalex":"https://openalex.org/W2971290973","doi":"https://doi.org/10.14778/3342263.3342633","mag":"2971290973"},"language":"en","primary_location":{"id":"doi:10.14778/3342263.3342633","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3342263.3342633","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072690436","display_name":"Andreas Kunft","orcid":"https://orcid.org/0000-0001-7557-1703"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Andreas Kunft","raw_affiliation_strings":["TU Berlin"],"affiliations":[{"raw_affiliation_string":"TU Berlin","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002932353","display_name":"Asterios Katsifodimos","orcid":"https://orcid.org/0000-0002-6717-2945"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Asterios Katsifodimos","raw_affiliation_strings":["Delft University of Technology"],"affiliations":[{"raw_affiliation_string":"Delft University of Technology","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090934117","display_name":"Sebastian Schelter","orcid":"https://orcid.org/0000-0003-4722-5840"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sebastian Schelter","raw_affiliation_strings":["New York University"],"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032404318","display_name":"Sebastian Bre\u00df","orcid":null},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sebastian Bre\u00df","raw_affiliation_strings":["DFKI and TU Berlin"],"affiliations":[{"raw_affiliation_string":"DFKI and TU Berlin","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002030730","display_name":"Tilmann Rabl","orcid":"https://orcid.org/0009-0009-3335-8045"},"institutions":[{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tilmann Rabl","raw_affiliation_strings":["Universit\u00e4t Potsdam"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t Potsdam","institution_ids":["https://openalex.org/I176453806"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002413906","display_name":"Volker Markl","orcid":"https://orcid.org/0009-0009-0964-026X"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]},{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]},{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE","NL"],"is_corresponding":false,"raw_author_name":"Volker Markl","raw_affiliation_strings":["DFKI","DFKI + HPI, Universit \u00e4t Potsdam","TU Berlin","Delft University of Technology"],"affiliations":[{"raw_affiliation_string":"DFKI","institution_ids":[]},{"raw_affiliation_string":"DFKI + HPI, Universit \u00e4t Potsdam","institution_ids":["https://openalex.org/I176453806"]},{"raw_affiliation_string":"TU Berlin","institution_ids":["https://openalex.org/I4577782"]},{"raw_affiliation_string":"Delft University of Technology","institution_ids":["https://openalex.org/I98358874"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5072690436"],"corresponding_institution_ids":["https://openalex.org/I4577782"],"apc_list":null,"apc_paid":null,"fwci":13.6706,"has_fulltext":false,"cited_by_count":47,"citation_normalized_percentile":{"value":0.98760547,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"12","issue":"11","first_page":"1553","last_page":"1567"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.782732367515564},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6186229586601257},{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.5396552085876465},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4892885386943817},{"id":"https://openalex.org/keywords/external-data-representation","display_name":"External Data Representation","score":0.48859888315200806},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4863637089729309},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.47573065757751465},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.47259387373924255},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4357416033744812},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.42205047607421875},{"id":"https://openalex.org/keywords/feature-engineering","display_name":"Feature engineering","score":0.4210833013057709},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3774212598800659},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.36250007152557373},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32021400332450867},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.13241249322891235}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.782732367515564},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6186229586601257},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.5396552085876465},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4892885386943817},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.48859888315200806},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4863637089729309},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.47573065757751465},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.47259387373924255},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4357416033744812},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.42205047607421875},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.4210833013057709},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3774212598800659},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.36250007152557373},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32021400332450867},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.13241249322891235},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C87717796","wikidata":"https://www.wikidata.org/wiki/Q146326","display_name":"Environmental engineering","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.14778/3342263.3342633","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3342263.3342633","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},{"id":"pmh:oai:kobv.de-opus4-uni-potsdam:48830","is_oa":false,"landing_page_url":"https://publishup.uni-potsdam.de/frontdoor/index/index/docId/48830","pdf_url":null,"source":{"id":"https://openalex.org/S4306400594","display_name":"publish.UP (University of Potsdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I176453806","host_organization_name":"University of Potsdam","host_organization_lineage":["https://openalex.org/I176453806"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},{"id":"pmh:oai:tudelft.nl:uuid:3970f98f-bcf1-4ead-93f7-2a2b20968bf8","is_oa":false,"landing_page_url":"http://resolver.tudelft.nl/uuid:3970f98f-bcf1-4ead-93f7-2a2b20968bf8","pdf_url":null,"source":{"id":"https://openalex.org/S4306400906","display_name":"Research Repository (Delft University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98358874","host_organization_name":"Delft University of Technology","host_organization_lineage":["https://openalex.org/I98358874"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5199999809265137,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G2231918279","display_name":null,"funder_award_id":"01IS18037A","funder_id":"https://openalex.org/F4320336673","funder_display_name":"Berlin Center for Machine Learning"},{"id":"https://openalex.org/G4696046801","display_name":null,"funder_award_id":"01IS18025A","funder_id":"https://openalex.org/F4320336673","funder_display_name":"Berlin Center for Machine Learning"}],"funders":[{"id":"https://openalex.org/F4320319918","display_name":"York University","ror":"https://ror.org/05fq50484"},{"id":"https://openalex.org/F4320334604","display_name":"Banting and Best Diabetes Centre, University of Toronto","ror":"https://ror.org/03dbr7087"},{"id":"https://openalex.org/F4320336673","display_name":"Berlin Center for Machine Learning","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":78,"referenced_works":["https://openalex.org/W1494930385","https://openalex.org/W1538786304","https://openalex.org/W1561487379","https://openalex.org/W1585035499","https://openalex.org/W1669302834","https://openalex.org/W1680392829","https://openalex.org/W1796676969","https://openalex.org/W1970372442","https://openalex.org/W1985954365","https://openalex.org/W1988425770","https://openalex.org/W1995618084","https://openalex.org/W2027720485","https://openalex.org/W2032775418","https://openalex.org/W2036971997","https://openalex.org/W2040581748","https://openalex.org/W2059267851","https://openalex.org/W2059720667","https://openalex.org/W2070996757","https://openalex.org/W2074702228","https://openalex.org/W2081124914","https://openalex.org/W2101234009","https://openalex.org/W2102458936","https://openalex.org/W2106771621","https://openalex.org/W2114303224","https://openalex.org/W2121810937","https://openalex.org/W2122339407","https://openalex.org/W2146620757","https://openalex.org/W2149127686","https://openalex.org/W2154697693","https://openalex.org/W2164106630","https://openalex.org/W2184623761","https://openalex.org/W2185864411","https://openalex.org/W2186615578","https://openalex.org/W2189162242","https://openalex.org/W2189465200","https://openalex.org/W2208923056","https://openalex.org/W2240938131","https://openalex.org/W2357449897","https://openalex.org/W2395323716","https://openalex.org/W2402144811","https://openalex.org/W2411006959","https://openalex.org/W2431765573","https://openalex.org/W2535724050","https://openalex.org/W2547190417","https://openalex.org/W2547386789","https://openalex.org/W2563724055","https://openalex.org/W2585098096","https://openalex.org/W2585388895","https://openalex.org/W2590246587","https://openalex.org/W2595408825","https://openalex.org/W2743948853","https://openalex.org/W2752640170","https://openalex.org/W2753069234","https://openalex.org/W2766504241","https://openalex.org/W2769962266","https://openalex.org/W2798416929","https://openalex.org/W2798656727","https://openalex.org/W2798725115","https://openalex.org/W2807799957","https://openalex.org/W2889897289","https://openalex.org/W2897128047","https://openalex.org/W2912256776","https://openalex.org/W2926314329","https://openalex.org/W2959792658","https://openalex.org/W2964054286","https://openalex.org/W2981018396","https://openalex.org/W2997591727","https://openalex.org/W3011813038","https://openalex.org/W3103594582","https://openalex.org/W4285719527","https://openalex.org/W4299828299","https://openalex.org/W6635206724","https://openalex.org/W6637216817","https://openalex.org/W6686239164","https://openalex.org/W6713134421","https://openalex.org/W6732959226","https://openalex.org/W6823981278","https://openalex.org/W6995434384"],"related_works":["https://openalex.org/W2989490741","https://openalex.org/W138569904","https://openalex.org/W2367545121","https://openalex.org/W4248881655","https://openalex.org/W2482165163","https://openalex.org/W3010890513","https://openalex.org/W3092506759","https://openalex.org/W2390914021","https://openalex.org/W2389417819","https://openalex.org/W3195278891"],"abstract_inverted_index":{"Machine":[0],"learning":[1],"(ML)":[2],"pipelines":[3,136],"for":[4,76],"model":[5,34],"training":[6,21,35,67],"and":[7,16,29,38,54,78,93,109,113,123,127,137,150],"validation":[8],"typically":[9],"include":[10],"preprocessing,":[11],"such":[12],"as":[13],"data":[14,95,125],"cleaning":[15],"feature":[17],"engineering,":[18],"prior":[19],"to":[20,45,158],"an":[22,159],"ML":[23,55,66,130,139],"model.":[24],"Preprocessing":[25],"combines":[26],"relational":[27],"algebra":[28],"user-defined":[30],"functions":[31],"(UDFs),":[32],"while":[33],"uses":[36],"iterations":[37],"linear":[39],"algebra.":[40],"Current":[41],"systems":[42],"are":[43,57],"tailored":[44],"either":[46],"of":[47,65,120,129,144,156,161],"the":[48,86,100,118,142],"two.":[49],"As":[50],"a":[51,72],"consequence,":[52],"preprocessing":[53,135],"steps":[56],"optimized":[58],"in":[59],"isolation.":[60],"To":[61],"enable":[62,102,106],"holistic":[63],"optimization":[64],"pipelines,":[68],"we":[69],"present":[70],"Lara,":[71],"declarative":[73],"domain-specific":[74,121],"language":[75],"collections":[77],"matrices.":[79],"Lara's":[80],"inter-mediate":[81],"representation":[82],"(IR)":[83],"reflects":[84],"on":[85,99,134,148],"complete":[87],"program,":[88],"i.e.,":[89],"UDFs,":[90],"control":[91],"flow,":[92],"both":[94],"types.":[96],"Two":[97],"views":[98],"IR":[101],"diverse":[103],"optimizations.":[104],"Monads":[105],"operator":[107],"pushdown":[108],"fusion":[110],"across":[111],"type":[112],"loop":[114],"boundaries.":[115],"Combinators":[116],"provide":[117],"semantics":[119],"operators":[122],"optimize":[124],"access":[126],"cross-validation":[128],"algorithms.":[131],"Our":[132],"experiments":[133],"selected":[138],"algorithms":[140],"show":[141],"effects":[143],"our":[145],"proposed":[146],"optimizations":[147],"dense":[149],"sparse":[151],"data,":[152],"which":[153],"achieve":[154],"speedups":[155],"up":[157],"order":[160],"magnitude.":[162]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":6}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
