{"id":"https://openalex.org/W3112375919","doi":"https://doi.org/10.1186/s40537-020-00388-5","title":"A comprehensive performance analysis of Apache Hadoop and Apache Spark for large scale data sets using HiBench","display_name":"A comprehensive performance analysis of Apache Hadoop and Apache Spark for large scale data sets using HiBench","publication_year":2020,"publication_date":"2020-12-01","ids":{"openalex":"https://openalex.org/W3112375919","doi":"https://doi.org/10.1186/s40537-020-00388-5","mag":"3112375919"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-020-00388-5","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-020-00388-5","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00388-5","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00388-5","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081116719","display_name":"N. Ahmed","orcid":"https://orcid.org/0000-0001-5663-0042"},"institutions":[{"id":"https://openalex.org/I51158804","display_name":"Massey University","ror":"https://ror.org/052czxv31","country_code":"NZ","type":"education","lineage":["https://openalex.org/I51158804"]}],"countries":["NZ"],"is_corresponding":true,"raw_author_name":"N. Ahmed","raw_affiliation_strings":["School of Natural and Computational Sciences, Massey University, Albany, Auckland, 0745, New Zealand"],"raw_orcid":"https://orcid.org/0000-0001-5663-0042","affiliations":[{"raw_affiliation_string":"School of Natural and Computational Sciences, Massey University, Albany, Auckland, 0745, New Zealand","institution_ids":["https://openalex.org/I51158804"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018208301","display_name":"Andre L. C. Barczak","orcid":"https://orcid.org/0000-0001-7648-285X"},"institutions":[{"id":"https://openalex.org/I51158804","display_name":"Massey University","ror":"https://ror.org/052czxv31","country_code":"NZ","type":"education","lineage":["https://openalex.org/I51158804"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Andre L. C. Barczak","raw_affiliation_strings":["School of Natural and Computational Sciences, Massey University, Albany, Auckland, 0745, New Zealand"],"raw_orcid":"https://orcid.org/0000-0001-7648-285X","affiliations":[{"raw_affiliation_string":"School of Natural and Computational Sciences, Massey University, Albany, Auckland, 0745, New Zealand","institution_ids":["https://openalex.org/I51158804"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037915797","display_name":"Teo Su\u0161njak","orcid":"https://orcid.org/0000-0001-9416-1435"},"institutions":[{"id":"https://openalex.org/I51158804","display_name":"Massey University","ror":"https://ror.org/052czxv31","country_code":"NZ","type":"education","lineage":["https://openalex.org/I51158804"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Teo Susnjak","raw_affiliation_strings":["School of Natural and Computational Sciences, Massey University, Albany, Auckland, 0745, New Zealand"],"raw_orcid":"https://orcid.org/0000-0001-9416-1435","affiliations":[{"raw_affiliation_string":"School of Natural and Computational Sciences, Massey University, Albany, Auckland, 0745, New Zealand","institution_ids":["https://openalex.org/I51158804"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037478522","display_name":"Mohammad A. Rashid","orcid":"https://orcid.org/0000-0002-0844-5819"},"institutions":[{"id":"https://openalex.org/I51158804","display_name":"Massey University","ror":"https://ror.org/052czxv31","country_code":"NZ","type":"education","lineage":["https://openalex.org/I51158804"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Mohammed A. Rashid","raw_affiliation_strings":["Department of Mechanical and Electrical Engineering, Massey University, Auckland, 0745, New Zealand"],"raw_orcid":"https://orcid.org/0000-0002-0844-5819","affiliations":[{"raw_affiliation_string":"Department of Mechanical and Electrical Engineering, Massey University, Auckland, 0745, New Zealand","institution_ids":["https://openalex.org/I51158804"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5081116719"],"corresponding_institution_ids":["https://openalex.org/I51158804"],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":12.2523,"has_fulltext":true,"cited_by_count":94,"citation_normalized_percentile":{"value":0.98601412,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"7","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.8996719121932983},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8748669624328613},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.799257218837738},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.7119754552841187},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.6676758527755737},{"id":"https://openalex.org/keywords/computer-cluster","display_name":"Computer cluster","score":0.6357097625732422},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.5907273292541504},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4653143882751465},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4536662697792053},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4275496006011963},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.42319005727767944},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.4141637682914734},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4122556149959564},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3674057126045227},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.36182093620300293},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.35318851470947266},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3424184024333954},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.14641577005386353}],"concepts":[{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.8996719121932983},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8748669624328613},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.799257218837738},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.7119754552841187},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.6676758527755737},{"id":"https://openalex.org/C29140674","wikidata":"https://www.wikidata.org/wiki/Q206637","display_name":"Computer cluster","level":2,"score":0.6357097625732422},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.5907273292541504},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4653143882751465},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4536662697792053},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4275496006011963},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.42319005727767944},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.4141637682914734},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4122556149959564},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3674057126045227},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.36182093620300293},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.35318851470947266},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3424184024333954},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.14641577005386353},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1186/s40537-020-00388-5","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-020-00388-5","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00388-5","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:ad7fcb9ed6734cc8a4e1ce9e1d44e684","is_oa":true,"landing_page_url":"https://doaj.org/article/ad7fcb9ed6734cc8a4e1ce9e1d44e684","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Big Data, Vol 7, Iss 1, Pp 1-18 (2020)","raw_type":"article"},{"id":"pmh:oai:mro.massey.ac.nz:10179/16008","is_oa":true,"landing_page_url":"http://hdl.handle.net/10179/16008","pdf_url":null,"source":{"id":"https://openalex.org/S4306402588","display_name":"Massey Research Online (Massey University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I51158804","host_organization_name":"Massey University","host_organization_lineage":["https://openalex.org/I51158804"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"}],"best_oa_location":{"id":"doi:10.1186/s40537-020-00388-5","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-020-00388-5","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00388-5","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4699999988079071}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3112375919.pdf","grobid_xml":"https://content.openalex.org/works/W3112375919.grobid-xml"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W1582576952","https://openalex.org/W1731459909","https://openalex.org/W1985554463","https://openalex.org/W2024872854","https://openalex.org/W2040728701","https://openalex.org/W2044936152","https://openalex.org/W2057420573","https://openalex.org/W2078082072","https://openalex.org/W2091351040","https://openalex.org/W2109701112","https://openalex.org/W2111364199","https://openalex.org/W2119738171","https://openalex.org/W2126623642","https://openalex.org/W2150478767","https://openalex.org/W2155072926","https://openalex.org/W2173213060","https://openalex.org/W2189465200","https://openalex.org/W2190012392","https://openalex.org/W2237980836","https://openalex.org/W2488300951","https://openalex.org/W2498111289","https://openalex.org/W2519041509","https://openalex.org/W2525726844","https://openalex.org/W2550375374","https://openalex.org/W2556483642","https://openalex.org/W2584112163","https://openalex.org/W2586475558","https://openalex.org/W2726036316","https://openalex.org/W2767478150","https://openalex.org/W2915978573","https://openalex.org/W3104065274","https://openalex.org/W3188047899"],"related_works":["https://openalex.org/W3191926225","https://openalex.org/W2525033434","https://openalex.org/W4280533024","https://openalex.org/W4300992253","https://openalex.org/W1942762218","https://openalex.org/W1985270856","https://openalex.org/W4246264554","https://openalex.org/W2962431048","https://openalex.org/W3112375919","https://openalex.org/W1833720599"],"abstract_inverted_index":{"Abstract":[0],"Big":[1],"Data":[2],"analytics":[3],"for":[4,16,120,160],"storing,":[5],"processing,":[6],"and":[7,28,47,69,97,138,146,185,200,216,249],"analyzing":[8],"large-scale":[9],"datasets":[10],"has":[11,75,228],"become":[12],"an":[13,109,149],"essential":[14],"tool":[15],"the":[17,41,58,70,87,129,142,175,223],"industry.":[18],"The":[19,82,187,220],"advent":[20],"of":[21,37,72,169,177,222],"distributed":[22],"computing":[23],"frameworks":[24,63,176],"such":[25],"as":[26,231],"Hadoop":[27,145,234],"Spark":[29,50,227],"offers":[30],"efficient":[31],"solutions":[32],"to":[33,40,140,173,233,242,251],"analyze":[34],"vast":[35],"amounts":[36],"data.":[38],"Due":[39],"application":[42],"programming":[43],"interface":[44],"(API)":[45],"availability":[46],"its":[48],"performance,":[49],"becomes":[51],"very":[52],"popular,":[53],"even":[54],"more":[55,65],"popular":[56],"than":[57,66],"MapReduce":[59],"framework.":[60],"Both":[61],"these":[62,73,162],"have":[64],"150":[67],"parameters,":[68,132],"combination":[71],"parameters":[74,85,163],"a":[76,157,166],"massive":[77],"impact":[78],"on":[79,165,194,212],"cluster":[80,103,118,151],"performance.":[81],"default":[83,258],"system":[84,88,92,208],"help":[86],"administrator":[89],"deploy":[90],"their":[91,101],"applications":[93],"without":[94],"much":[95],"effort,":[96],"they":[98],"can":[99,113],"measure":[100],"specific":[102],"performance":[104,119,143,188,230],"with":[105],"factory-set":[106],"parameters.":[107],"However,":[108],"open":[110],"question":[111],"remains:":[112],"new":[114],"parameter":[115,218,259],"selection":[116],"improve":[117],"large":[121,167],"datasets?":[122],"In":[123,171],"this":[124,126],"regard,":[125],"study":[127],"investigates":[128],"most":[130],"impacting":[131],"under":[133],"resource":[134],"utilization,":[135],"input":[136,213],"splits,":[137],"shuffle,":[139],"compare":[141],"between":[144],"Spark,":[147],"using":[148],"implemented":[150],"in":[152,246,254],"our":[153],"laboratory.":[154],"We":[155],"used":[156],"trial-and-error":[158],"approach":[159],"tuning":[161],"based":[164,193],"number":[168],"experiments.":[170],"order":[172],"evaluate":[174],"comparative":[178],"analysis,":[179],"we":[180],"select":[181],"two":[182,243],"workloads:":[183],"WordCount":[184,247],"TeraSort.":[186],"metrics":[189],"are":[190,238,261],"carried":[191],"out":[192],"three":[195],"criteria:":[196],"execution":[197],"time,":[198],"throughput,":[199],"speedup.":[201],"Our":[202],"experimental":[203],"results":[204,224],"revealed":[205],"that":[206,226],"both":[207],"performances":[209],"heavily":[210],"depends":[211],"data":[214,236],"size":[215],"correct":[217],"selection.":[219],"analysis":[221],"shows":[225],"better":[229],"compared":[232],"when":[235,257],"sets":[237],"small,":[239],"achieving":[240],"up":[241,250],"times":[244,253],"speedup":[245],"workloads":[248,256],"14":[252],"TeraSort":[255],"values":[260],"reconfigured.":[262]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":31},{"year":2023,"cited_by_count":21},{"year":2022,"cited_by_count":16},{"year":2021,"cited_by_count":7}],"updated_date":"2026-05-23T08:51:43.019350","created_date":"2025-10-10T00:00:00"}
