{"id":"https://openalex.org/W2783523382","doi":"https://doi.org/10.1109/bigdata.2017.8257942","title":"Understanding and optimizing the performance of distributed machine learning applications on apache spark","display_name":"Understanding and optimizing the performance of distributed machine learning applications on apache spark","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2783523382","doi":"https://doi.org/10.1109/bigdata.2017.8257942","mag":"2783523382"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2017.8257942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8257942","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1612.01437","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013807314","display_name":"Celestine D\u00fcnner","orcid":"https://orcid.org/0000-0002-9880-7173"},"institutions":[{"id":"https://openalex.org/I4210126328","display_name":"IBM Research - Zurich","ror":"https://ror.org/02js37d36","country_code":"CH","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126328"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Celestine Dunner","raw_affiliation_strings":["IBM Research, Z\u00fcrich, Switzerland"],"affiliations":[{"raw_affiliation_string":"IBM Research, Z\u00fcrich, Switzerland","institution_ids":["https://openalex.org/I4210126328"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040727953","display_name":"Thomas Parnell","orcid":"https://orcid.org/0000-0002-1308-6590"},"institutions":[{"id":"https://openalex.org/I4210126328","display_name":"IBM Research - Zurich","ror":"https://ror.org/02js37d36","country_code":"CH","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126328"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Thomas Parnell","raw_affiliation_strings":["IBM Research, Z\u00fcrich, Switzerland"],"affiliations":[{"raw_affiliation_string":"IBM Research, Z\u00fcrich, Switzerland","institution_ids":["https://openalex.org/I4210126328"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027902223","display_name":"Kubilay Atasu","orcid":"https://orcid.org/0000-0002-4315-6780"},"institutions":[{"id":"https://openalex.org/I4210126328","display_name":"IBM Research - Zurich","ror":"https://ror.org/02js37d36","country_code":"CH","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126328"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Kubilay Atasu","raw_affiliation_strings":["IBM Research, Z\u00fcrich, Switzerland"],"affiliations":[{"raw_affiliation_string":"IBM Research, Z\u00fcrich, Switzerland","institution_ids":["https://openalex.org/I4210126328"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052483328","display_name":"Manolis Sifalakis","orcid":"https://orcid.org/0000-0002-0949-2094"},"institutions":[{"id":"https://openalex.org/I4210126328","display_name":"IBM Research - Zurich","ror":"https://ror.org/02js37d36","country_code":"CH","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126328"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Manolis Sifalakis","raw_affiliation_strings":["IBM Research, Z\u00fcrich, Switzerland"],"affiliations":[{"raw_affiliation_string":"IBM Research, Z\u00fcrich, Switzerland","institution_ids":["https://openalex.org/I4210126328"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007575599","display_name":"Haralampos Pozidis","orcid":"https://orcid.org/0000-0001-5084-6651"},"institutions":[{"id":"https://openalex.org/I4210126328","display_name":"IBM Research - Zurich","ror":"https://ror.org/02js37d36","country_code":"CH","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126328"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Haralampos Pozidis","raw_affiliation_strings":["IBM Research, Z\u00fcrich, Switzerland"],"affiliations":[{"raw_affiliation_string":"IBM Research, Z\u00fcrich, Switzerland","institution_ids":["https://openalex.org/I4210126328"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5013807314"],"corresponding_institution_ids":["https://openalex.org/I4210126328"],"apc_list":null,"apc_paid":null,"fwci":0.8369,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.81757948,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"331","last_page":"338"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.9032748341560364},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8459243178367615},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5312119722366333},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5067790150642395},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5015149116516113},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.47180142998695374},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41927939653396606},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.33842530846595764},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3380613327026367},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3361626863479614},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3068746328353882},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.20251184701919556}],"concepts":[{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.9032748341560364},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8459243178367615},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5312119722366333},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5067790150642395},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5015149116516113},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.47180142998695374},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41927939653396606},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.33842530846595764},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3380613327026367},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3361626863479614},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3068746328353882},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.20251184701919556},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata.2017.8257942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8257942","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1612.01437","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1612.01437","pdf_url":"https://arxiv.org/pdf/1612.01437","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1612.01437","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1612.01437","pdf_url":"https://arxiv.org/pdf/1612.01437","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.6399999856948853,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W31923072","https://openalex.org/W1123101191","https://openalex.org/W1575350781","https://openalex.org/W1746258828","https://openalex.org/W2060393849","https://openalex.org/W2074702228","https://openalex.org/W2083842231","https://openalex.org/W2131975293","https://openalex.org/W2138243089","https://openalex.org/W2146292423","https://openalex.org/W2148238780","https://openalex.org/W2401974969","https://openalex.org/W2464996709","https://openalex.org/W2556660792","https://openalex.org/W2951781666","https://openalex.org/W2962825295","https://openalex.org/W6634431598","https://openalex.org/W6637806892","https://openalex.org/W6665801690","https://openalex.org/W6679815717","https://openalex.org/W6680402377","https://openalex.org/W6681731484"],"related_works":["https://openalex.org/W1975949872","https://openalex.org/W3159871278","https://openalex.org/W2230552005","https://openalex.org/W2905242764","https://openalex.org/W3109411864","https://openalex.org/W3017846737","https://openalex.org/W4379407450","https://openalex.org/W3003280185","https://openalex.org/W2613379984","https://openalex.org/W3198126144"],"abstract_inverted_index":{"In":[0,65],"this":[1],"paper":[2],"we":[3,71,101,153,189],"explore":[4],"the":[5,19,40,51,60,63,95,108,121,124,139,142,186,195],"performance":[6,42,61,70,99,110,196],"limits":[7],"of":[8,21,50,62,76,82,112,141],"Apache":[9],"Spark":[10,30,52,69,199],"for":[11],"machine":[12,25,165],"learning":[13,26,166],"applications.":[14],"We":[15,46,174],"begin":[16],"by":[17,184],"analyzing":[18],"characteristics":[20],"a":[22,74,192],"state-of-the-art":[23],"distributed":[24,143,163],"algorithm":[27,122,144],"implemented":[28,171],"in":[29,104,172,194],"and":[31,54,89,130,149,157,181,200],"compare":[32],"it":[33,115],"to":[34,67,79,98,106,118,123,160,204],"an":[35],"equivalent":[36],"reference":[37],"implementation":[38,114],"using":[39,177,185],"high":[41],"computing":[43],"framework":[44,53,90],"MPI.":[45],"identify":[47],"critical":[48],"bottlenecks":[49],"carefully":[55,119],"study":[56],"their":[57],"implications":[58],"on":[59,137],"algorithm.":[64],"order":[66,105],"improve":[68],"then":[72],"propose":[73],"number":[75],"practical":[77],"techniques":[78],"alleviate":[80],"some":[81],"its":[83],"overheads.":[84],"However,":[85],"optimizing":[86],"computational":[87],"efficiency":[88],"related":[91],"overheads":[92],"is":[93,116],"not":[94],"only":[96],"key":[97],"\u2014":[100],"demonstrate":[102,182],"that":[103,168,183],"get":[107],"best":[109],"out":[111],"any":[113],"necessary":[117],"tune":[120],"respective":[125],"trade-off":[126,135],"between":[127,198],"computation":[128],"time":[129],"communication":[131],"latency.":[132],"The":[133],"optimal":[134],"depends":[136],"both":[138],"properties":[140],"as":[145,147],"well":[146],"infrastructure":[148],"framework-related":[150],"characteristics.":[151],"Finally,":[152],"apply":[154],"these":[155],"technical":[156],"algorithmic":[158],"optimizations":[159],"three":[161],"different":[162],"linear":[164],"algorithms":[167],"have":[169],"been":[170],"Spark.":[173],"present":[175],"results":[176],"five":[178],"large":[179],"datasets":[180],"proposed":[187],"optimizations,":[188],"can":[190],"achieve":[191],"reduction":[193],"difference":[197],"MPI":[201],"from":[202],"20x":[203],"2x.":[205]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1}],"updated_date":"2026-03-30T06:00:46.510872","created_date":"2018-01-26T00:00:00"}
