{"id":"https://openalex.org/W2044496401","doi":"https://doi.org/10.1145/2484838.2484884","title":"Making sense of big data with the Berkeley data analytics stack","display_name":"Making sense of big data with the Berkeley data analytics stack","publication_year":2013,"publication_date":"2013-07-29","ids":{"openalex":"https://openalex.org/W2044496401","doi":"https://doi.org/10.1145/2484838.2484884","mag":"2044496401"},"language":"en","primary_location":{"id":"doi:10.1145/2484838.2484884","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2484838.2484884","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th International Conference on Scientific and Statistical Database Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102019637","display_name":"Michael J. Franklin","orcid":"https://orcid.org/0000-0002-2903-9115"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Michael J. Franklin","raw_affiliation_strings":["University of California, Berkeley"],"affiliations":[{"raw_affiliation_string":"University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5102019637"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":3.1579,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.91631799,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9708999991416931,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9708999991416931,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9545999765396118,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.9334999918937683,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.7425438761711121},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.733748733997345},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.6749131083488464},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.6087437272071838},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5765396356582642},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.5419541001319885},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.45143845677375793},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.4153987765312195},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.18356174230575562},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.14997434616088867},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.13745900988578796}],"concepts":[{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.7425438761711121},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.733748733997345},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.6749131083488464},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6087437272071838},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5765396356582642},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.5419541001319885},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.45143845677375793},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.4153987765312195},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.18356174230575562},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.14997434616088867},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.13745900988578796},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2484838.2484884","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2484838.2484884","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th International Conference on Scientific and Statistical Database Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W17155033","https://openalex.org/W2766461310","https://openalex.org/W4247566972","https://openalex.org/W4388692845","https://openalex.org/W3202731209","https://openalex.org/W3211874991","https://openalex.org/W4308507533","https://openalex.org/W2407107767","https://openalex.org/W2901787049","https://openalex.org/W3191926225"],"abstract_inverted_index":{"The":[0,51,141],"Berkeley":[1,160],"AMPLab":[2],"was":[3],"founded":[4],"on":[5,125,222,237,285],"the":[6,9,27,33,54,60,82,131,148,159,165,169,193,198,204,214,223,246,255,267,280,298],"idea":[7],"that":[8,43,226,276],"challenges":[10],"of":[11,48,53,68,84,133,150,179,182,217,243,264],"emerging":[12],"Big":[13],"Data":[14,161],"applications":[15],"require":[16],"a":[17,111,151,274,286],"new":[18],"approach":[19],"to":[20,31,57,230,272,283,290],"analytics":[21,35,299],"systems.":[22],"Launching":[23],"in":[24,173,189],"early":[25],"2011,":[26],"project":[28],"set":[29],"out":[30],"rethink":[32],"traditional":[34],"stack,":[36],"breaking":[37],"down":[38],"technical":[39],"and":[40,77,87,91,97,127,129,137,191,203,241,294],"intellectual":[41],"barriers":[42],"had":[44],"arisen":[45],"during":[46],"decades":[47],"evolutionary":[49],"development.":[50],"vision":[52],"lab":[55,142,170],"is":[56,143],"seamlessly":[58],"integrate":[59],"three":[61,167],"main":[62],"resources":[63,282],"available":[64],"for":[65],"making":[66],"sense":[67],"data":[69],"at":[70],"scale:":[71],"Algorithms":[72],"(such":[73],"as":[74,95,100,249],"machine":[75,238],"learning":[76,239],"statistical":[78],"techniques),":[79],"Machines":[80],"(in":[81],"form":[83,273],"scalable":[85],"clusters":[86],"elastic":[88],"cloud":[89],"computing),":[90],"People":[92],"(both":[93],"individually":[94],"analysts":[96],"en":[98],"masse,":[99],"with":[101,114,122,219],"crowd-sourced":[102],"human":[103],"computation).":[104],"To":[105],"pursue":[106],"this":[107,210],"goal,":[108],"we":[109],"assembled":[110],"research":[112],"team":[113],"diverse":[115],"interests":[116],"across":[117],"computer":[118],"science,":[119],"forged":[120],"relationships":[121],"domain":[123],"experts":[124],"campus":[126],"elsewhere,":[128],"obtained":[130],"support":[132],"leading":[134],"industry":[135,190],"partners":[136],"major":[138,177],"government":[139],"sponsors.":[140],"realizing":[144],"its":[145],"ideas":[146],"through":[147],"development":[149],"freely-available":[152],"Open":[153],"Source":[154],"software":[155],"stack":[156],"called":[157],"BDAS:":[158],"Analytics":[162],"Stack.":[163],"In":[164,209],"nearly":[166],"years":[168],"has":[171],"been":[172,228],"operation,":[174],"we've":[175],"released":[176,229],"components":[178,184,225],"BDAS.":[180],"Several":[181],"these":[183],"have":[185,227],"gained":[186],"significant":[187],"traction":[188],"elsewhere:":[192],"Mesos":[194],"cluster":[195],"resource":[196],"manager,":[197],"Spark":[199],"in-memory":[200],"computation":[201],"framework,":[202],"Shark":[205],"query":[206],"processing":[207],"system.":[208],"talk":[211],"I'll":[212,232],"describe":[213],"current":[215],"state":[216],"BDAS":[218],"an":[220],"emphasis":[221],"key":[224],"date.":[231],"then":[233],"discuss":[234],"ongoing":[235],"efforts":[236],"scalability":[240],"ease":[242],"use,":[244],"including":[245],"MLbase":[247],"system,":[248],"our":[250,261],"focus":[251],"moves":[252],"higher":[253],"up":[254],"stack.":[256],"Finally":[257],"I":[258],"will":[259,269],"present":[260],"longer-term":[262],"views":[263],"how":[265],"all":[266],"pieces":[268],"fit":[270],"together":[271],"system":[275],"can":[277],"adaptively":[278],"bring":[279],"right":[281],"bear":[284],"given":[287],"data-driven":[288],"question":[289],"meet":[291],"time,":[292],"cost":[293],"quality":[295],"requirements":[296],"throughout":[297],"lifecycle.":[300]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
