{"id":"https://openalex.org/W2126623642","doi":"https://doi.org/10.1186/s40537-015-0032-1","title":"A survey of open source tools for machine learning with big data in the Hadoop ecosystem","display_name":"A survey of open source tools for machine learning with big data in the Hadoop ecosystem","publication_year":2015,"publication_date":"2015-11-05","ids":{"openalex":"https://openalex.org/W2126623642","doi":"https://doi.org/10.1186/s40537-015-0032-1","mag":"2126623642"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-015-0032-1","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-015-0032-1","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-015-0032-1","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-015-0032-1","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010761641","display_name":"Sara Landset","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sara Landset","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, FL, 33431, USA","[Florida Atlantic University, Boca Raton, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, FL, 33431, USA","institution_ids":["https://openalex.org/I63772739"]},{"raw_affiliation_string":"[Florida Atlantic University, Boca Raton, USA]","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089170562","display_name":"Taghi M. Khoshgoftaar","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taghi M. Khoshgoftaar","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, FL, 33431, USA","[Florida Atlantic University, Boca Raton, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, FL, 33431, USA","institution_ids":["https://openalex.org/I63772739"]},{"raw_affiliation_string":"[Florida Atlantic University, Boca Raton, USA]","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044553634","display_name":"Aaron N. Richter","orcid":"https://orcid.org/0000-0003-3269-867X"},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aaron N. Richter","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, FL, 33431, USA","[Florida Atlantic University, Boca Raton, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, FL, 33431, USA","institution_ids":["https://openalex.org/I63772739"]},{"raw_affiliation_string":"[Florida Atlantic University, Boca Raton, USA]","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079873660","display_name":"Tawfiq Hasanin","orcid":"https://orcid.org/0000-0003-1072-278X"},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tawfiq Hasanin","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, FL, 33431, USA","[Florida Atlantic University, Boca Raton, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, FL, 33431, USA","institution_ids":["https://openalex.org/I63772739"]},{"raw_affiliation_string":"[Florida Atlantic University, Boca Raton, USA]","institution_ids":["https://openalex.org/I63772739"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5010761641"],"corresponding_institution_ids":["https://openalex.org/I63772739"],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":121.1713,"has_fulltext":true,"cited_by_count":435,"citation_normalized_percentile":{"value":0.99972979,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"2","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7946040630340576},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.7687138319015503},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.7520843744277954},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4865676462650299},{"id":"https://openalex.org/keywords/open-platform","display_name":"Open platform","score":0.4465833306312561},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.33521568775177},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.26023685932159424},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.24860438704490662},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.13103386759757996}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7946040630340576},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.7687138319015503},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.7520843744277954},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4865676462650299},{"id":"https://openalex.org/C108383078","wikidata":"https://www.wikidata.org/wiki/Q7096399","display_name":"Open platform","level":3,"score":0.4465833306312561},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.33521568775177},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26023685932159424},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.24860438704490662},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.13103386759757996}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1186/s40537-015-0032-1","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-015-0032-1","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-015-0032-1","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1186/s40537-015-0032-1","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-015-0032-1","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-015-0032-1","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5416735113","display_name":"MRI: Acquisition of Big Data Training and Research Laboratory","funder_award_id":"1427536","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6293531692","display_name":null,"funder_award_id":"CNS-1427536","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2126623642.pdf","grobid_xml":"https://content.openalex.org/works/W2126623642.grobid-xml"},"referenced_works_count":95,"referenced_works":["https://openalex.org/W81419089","https://openalex.org/W107711226","https://openalex.org/W266253324","https://openalex.org/W268269188","https://openalex.org/W303513970","https://openalex.org/W603723648","https://openalex.org/W866281365","https://openalex.org/W1485408073","https://openalex.org/W1530955034","https://openalex.org/W1635892993","https://openalex.org/W1746258828","https://openalex.org/W1788418780","https://openalex.org/W1874755017","https://openalex.org/W1965902027","https://openalex.org/W1966894438","https://openalex.org/W1966979133","https://openalex.org/W1984459577","https://openalex.org/W1991557405","https://openalex.org/W1992795012","https://openalex.org/W2001378540","https://openalex.org/W2006613599","https://openalex.org/W2006801410","https://openalex.org/W2009537245","https://openalex.org/W2010462324","https://openalex.org/W2013344760","https://openalex.org/W2016653720","https://openalex.org/W2028598612","https://openalex.org/W2029237784","https://openalex.org/W2030548314","https://openalex.org/W2038571938","https://openalex.org/W2043267967","https://openalex.org/W2044102377","https://openalex.org/W2044439547","https://openalex.org/W2044849727","https://openalex.org/W2046864590","https://openalex.org/W2047507037","https://openalex.org/W2049391671","https://openalex.org/W2050668158","https://openalex.org/W2055647373","https://openalex.org/W2062106211","https://openalex.org/W2062970599","https://openalex.org/W2074185344","https://openalex.org/W2076663617","https://openalex.org/W2078488685","https://openalex.org/W2078945459","https://openalex.org/W2083476508","https://openalex.org/W2086239440","https://openalex.org/W2089442574","https://openalex.org/W2090732649","https://openalex.org/W2094848836","https://openalex.org/W2098035320","https://openalex.org/W2104564392","https://openalex.org/W2105768086","https://openalex.org/W2105947650","https://openalex.org/W2113236773","https://openalex.org/W2118023920","https://openalex.org/W2119168155","https://openalex.org/W2121456247","https://openalex.org/W2121810937","https://openalex.org/W2122465391","https://openalex.org/W2131151640","https://openalex.org/W2131391419","https://openalex.org/W2140336868","https://openalex.org/W2141551277","https://openalex.org/W2142466236","https://openalex.org/W2146762855","https://openalex.org/W2153246527","https://openalex.org/W2161483952","https://openalex.org/W2163465797","https://openalex.org/W2170413589","https://openalex.org/W2170616854","https://openalex.org/W2183669956","https://openalex.org/W2184533611","https://openalex.org/W2184623761","https://openalex.org/W2189465200","https://openalex.org/W2207770721","https://openalex.org/W2212755431","https://openalex.org/W2248371688","https://openalex.org/W2280624358","https://openalex.org/W2313203347","https://openalex.org/W2324749769","https://openalex.org/W2325507490","https://openalex.org/W2340222647","https://openalex.org/W2394144822","https://openalex.org/W2406734291","https://openalex.org/W2408198315","https://openalex.org/W2468853426","https://openalex.org/W2545264114","https://openalex.org/W2550375374","https://openalex.org/W2570990685","https://openalex.org/W2913854892","https://openalex.org/W2914040074","https://openalex.org/W2952644771","https://openalex.org/W4213078593","https://openalex.org/W6677839623"],"related_works":["https://openalex.org/W4322629366","https://openalex.org/W2808989540","https://openalex.org/W2397053934","https://openalex.org/W1039292361","https://openalex.org/W2731626691","https://openalex.org/W2551093110","https://openalex.org/W2148016376","https://openalex.org/W4237919137","https://openalex.org/W3184179822","https://openalex.org/W3095362084"],"abstract_inverted_index":{"With":[0],"an":[1,105,159],"ever-increasing":[2],"amount":[3],"of":[4,8,85,98,107,112,146,152,161,173,182,217],"options,":[5],"the":[6,60,108,120,127,138,147,169,251,264],"task":[7],"selecting":[9],"machine":[10,41,66,155,198],"learning":[11,42,67,156,199],"tools":[12,21,39,268],"for":[13,40,100,141,275],"big":[14,72],"data":[15,33],"can":[16],"be":[17],"difficult.":[18],"The":[19,31],"available":[20],"have":[22,28,81],"advantages":[23,109,170],"and":[24,26,37,51,110,122,158,171,192,201,207,219,248,269],"drawbacks,":[25],"many":[27,145],"overlapping":[29],"uses.":[30],"world\u2019s":[32],"is":[34,56,69,222],"growing":[35],"rapidly,":[36],"traditional":[38],"are":[43,150],"becoming":[44],"insufficient":[45],"as":[46,214,243,246],"we":[47,134,259],"move":[48],"towards":[49],"distributed":[50],"real-time":[52],"processing.":[53],"This":[54],"paper":[55,94,234],"intended":[57],"to":[58,76,87,137,236],"aid":[59],"researcher":[61],"or":[62],"professional":[63],"who":[64],"understands":[65],"but":[68],"inexperienced":[70],"with":[71,104,179],"data.":[73],"In":[74],"order":[75],"evaluate":[77,208],"tools,":[78],"one":[79],"should":[80],"a":[82,96,142,153,180,229],"thorough":[83],"understanding":[84,160],"what":[86,125,250],"look":[88,143,196],"for.":[89],"To":[90],"that":[91,149,184,226],"end,":[92],"this":[93,116,233,257],"provides":[95],"list":[97],"criteria":[99,212],"making":[101],"selections":[102],"along":[103,178],"analysis":[106],"drawbacks":[111],"each.":[113],"We":[114,167,194],"do":[115],"by":[117,241],"starting":[118],"from":[119],"beginning,":[121],"looking":[123],"at":[124,144,197],"exactly":[126],"term":[128],"\u201cbig":[129],"data\u201d":[130],"means.":[131],"From":[132],"there,":[133],"go":[135],"on":[136,211],"Hadoop":[139],"ecosystem":[140],"projects":[148],"part":[151],"typical":[154],"architecture":[157],"how":[162],"everything":[163],"might":[164],"fit":[165],"together.":[166],"discuss":[168],"disadvantages":[172],"three":[174],"different":[175],"processing":[176],"paradigms":[177],"comparison":[181],"engines":[183],"implement":[185],"them,":[186],"including":[187,203],"MapReduce,":[188],"Spark,":[189],"Flink,":[190],"Storm,":[191],"H2O.":[193],"then":[195],"libraries":[200],"frameworks":[202],"Mahout,":[204],"MLlib,":[205],"SAMOA,":[206],"them":[209],"based":[210],"such":[213],"scalability,":[215],"ease":[216],"use,":[218],"extensibility.":[220],"There":[221],"no":[223],"single":[224],"toolkit":[225],"truly":[227],"embodies":[228],"one-size-fits-all":[230],"solution,":[231],"so":[232],"aims":[235],"help":[237],"make":[238],"decisions":[239],"smoother":[240],"providing":[242],"much":[244],"information":[245],"possible":[247,272],"quantifying":[249],"tradeoffs":[252],"will":[253],"be.":[254],"Additionally,":[255],"throughout":[256],"paper,":[258],"review":[260],"recent":[261],"research":[262],"in":[263],"field":[265],"using":[266],"these":[267],"talk":[270],"about":[271],"future":[273],"directions":[274],"toolkit-based":[276],"learning.":[277]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":21},{"year":2023,"cited_by_count":27},{"year":2022,"cited_by_count":46},{"year":2021,"cited_by_count":62},{"year":2020,"cited_by_count":52},{"year":2019,"cited_by_count":67},{"year":2018,"cited_by_count":72},{"year":2017,"cited_by_count":45},{"year":2016,"cited_by_count":28},{"year":2015,"cited_by_count":2}],"updated_date":"2026-05-28T09:10:13.091523","created_date":"2025-10-10T00:00:00"}
