{"id":"https://openalex.org/W3117549991","doi":"https://doi.org/10.1109/mascots50786.2020.9285944","title":"Performance Prediction for Data-driven Workflows on Apache Spark","display_name":"Performance Prediction for Data-driven Workflows on Apache Spark","publication_year":2020,"publication_date":"2020-11-17","ids":{"openalex":"https://openalex.org/W3117549991","doi":"https://doi.org/10.1109/mascots50786.2020.9285944","mag":"3117549991"},"language":"en","primary_location":{"id":"doi:10.1109/mascots50786.2020.9285944","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mascots50786.2020.9285944","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 28th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://ieeexplore.ieee.org/document/9285944","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089296485","display_name":"Andrea Gulino","orcid":"https://orcid.org/0000-0003-0201-9461"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Andrea Gulino","raw_affiliation_strings":["Politecnico di Milano"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023498681","display_name":"Arif Canakoglu","orcid":"https://orcid.org/0000-0003-4528-6586"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Arif Canakoglu","raw_affiliation_strings":["Politecnico di Milano"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048332811","display_name":"Stefano Ceri","orcid":"https://orcid.org/0000-0003-0671-2415"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Stefano Ceri","raw_affiliation_strings":["Politecnico di Milano"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039059621","display_name":"Danilo Ardagna","orcid":"https://orcid.org/0000-0003-4224-927X"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Danilo Ardagna","raw_affiliation_strings":["Politecnico di Milano"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano","institution_ids":["https://openalex.org/I93860229"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5089296485"],"corresponding_institution_ids":["https://openalex.org/I93860229"],"apc_list":null,"apc_paid":null,"fwci":1.3345,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.86825265,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"81","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8518027067184448},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.8436907529830933},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.8170936703681946},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.5321242809295654},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5050516724586487},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.48897087574005127},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.44597721099853516},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4273899495601654},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.4219651222229004},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40734928846359253},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3243403434753418},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3237572908401489},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.31137678027153015},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.24334222078323364}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8518027067184448},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.8436907529830933},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.8170936703681946},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.5321242809295654},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5050516724586487},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.48897087574005127},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.44597721099853516},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4273899495601654},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4219651222229004},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40734928846359253},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3243403434753418},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3237572908401489},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.31137678027153015},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.24334222078323364},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/mascots50786.2020.9285944","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mascots50786.2020.9285944","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 28th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)","raw_type":"proceedings-article"},{"id":"pmh:oai:re.public.polimi.it:11311/1158359","is_oa":true,"landing_page_url":"https://ieeexplore.ieee.org/document/9285944","pdf_url":null,"source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:re.public.polimi.it:11311/1158359","is_oa":true,"landing_page_url":"https://ieeexplore.ieee.org/document/9285944","pdf_url":null,"source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferenceObject"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W150760603","https://openalex.org/W1663984431","https://openalex.org/W1969389753","https://openalex.org/W1998066559","https://openalex.org/W2001172853","https://openalex.org/W2002472616","https://openalex.org/W2004206314","https://openalex.org/W2026535637","https://openalex.org/W2071598946","https://openalex.org/W2109734800","https://openalex.org/W2127082188","https://openalex.org/W2142680014","https://openalex.org/W2202483651","https://openalex.org/W2245675429","https://openalex.org/W2309679942","https://openalex.org/W2528668861","https://openalex.org/W2591836544","https://openalex.org/W2604856537","https://openalex.org/W2618602062","https://openalex.org/W2620263811","https://openalex.org/W2747362881","https://openalex.org/W2761251889","https://openalex.org/W2796039710","https://openalex.org/W2871948634","https://openalex.org/W2888421737","https://openalex.org/W2900930447","https://openalex.org/W2901935513","https://openalex.org/W2951642938","https://openalex.org/W2970442534","https://openalex.org/W3104906823","https://openalex.org/W6606142028","https://openalex.org/W6697698479","https://openalex.org/W6735916004","https://openalex.org/W6743018549"],"related_works":["https://openalex.org/W2766461310","https://openalex.org/W4247566972","https://openalex.org/W3083262785","https://openalex.org/W3202731209","https://openalex.org/W4240326769","https://openalex.org/W3211874991","https://openalex.org/W3012518171","https://openalex.org/W2906268655","https://openalex.org/W4241605045","https://openalex.org/W4379407450"],"abstract_inverted_index":{"Spark":[0,17,94],"is":[1,19],"an":[2,20,83,138],"in-memory":[3],"framework":[4],"for":[5],"implementing":[6],"distributed":[7],"applications":[8,18,46],"of":[9,16,38,82,86,93,125,128,151],"various":[10],"types.":[11],"Predicting":[12],"the":[13,30,65,122],"execution":[14,66,123],"time":[15,124],"important":[21],"but":[22],"challenging":[23],"problem":[24],"that":[25],"has":[26],"been":[27],"tackled":[28],"in":[29,63],"past":[31],"few":[32],"years":[33],"by":[34,73],"several":[35],"studies;":[36],"most":[37],"them":[39],"achieving":[40],"good":[41,120],"prediction":[42],"accuracy":[43],"on":[44,113,141],"simple":[45],"(e.g.":[47],"known":[48,87],"ML":[49,147],"algorithms":[50],"or":[51],"SQL-based":[52],"applications).":[53],"In":[54],"this":[55],"work,":[56],"we":[57,116],"consider":[58],"complex":[59,143],"data-driven":[60],"workflow":[61],"applications,":[62,144],"which":[64],"and":[67,107,131,149],"data":[68],"flow":[69],"can":[70,79,117],"be":[71,80],"modeled":[72],"Directly":[74],"Acyclic":[75],"Graphs":[76],"(DAGs).":[77],"Workflows":[78],"made":[81],"arbitrary":[84],"combination":[85],"tasks,":[88],"each":[89],"applying":[90],"a":[91,102],"set":[92],"operations":[95],"to":[96],"their":[97],"input":[98],"data.":[99],"By":[100],"adopting":[101],"hybrid":[103],"approach,":[104],"combining":[105],"analytical":[106],"machine":[108],"learning":[109],"(ML)":[110],"models,":[111],"trained":[112],"small":[114],"DAGs,":[115],"predict,":[118],"with":[119],"accuracy,":[121],"unseen":[126],"workflows":[127],"higher":[129],"complexity":[130],"size.":[132],"We":[133],"validate":[134],"our":[135],"approach":[136],"through":[137],"extensive":[139],"experimentation":[140],"real-world":[142],"comparing":[145],"different":[146],"models":[148],"choices":[150],"feature":[152],"sets.":[153]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
