{"id":"https://openalex.org/W2114303224","doi":"https://doi.org/10.1145/1559845.1559865","title":"A comparison of approaches to large-scale data analysis","display_name":"A comparison of approaches to large-scale data analysis","publication_year":2009,"publication_date":"2009-06-29","ids":{"openalex":"https://openalex.org/W2114303224","doi":"https://doi.org/10.1145/1559845.1559865","mag":"2114303224"},"language":"en","primary_location":{"id":"doi:10.1145/1559845.1559865","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1559845.1559865","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2009 ACM SIGMOD International Conference on Management of data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049165312","display_name":"Andrew Pavlo","orcid":"https://orcid.org/0000-0001-6040-6991"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Andrew Pavlo","raw_affiliation_strings":["Brown University, Providence, RI, USA"],"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI, USA","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110571440","display_name":"Erik K. Paulson","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Erik Paulson","raw_affiliation_strings":["University of Wisconsin, Madison, WI, USA"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin, Madison, WI, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007690835","display_name":"Alexander Rasin","orcid":"https://orcid.org/0000-0001-7282-5763"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Rasin","raw_affiliation_strings":["Brown University, Providence, RI, USA"],"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI, USA","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049333271","display_name":"Daniel J. Abadi","orcid":"https://orcid.org/0000-0003-3771-2995"},"institutions":[{"id":"https://openalex.org/I32971472","display_name":"Yale University","ror":"https://ror.org/03v76x132","country_code":"US","type":"education","lineage":["https://openalex.org/I32971472"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel J. Abadi","raw_affiliation_strings":["Yale University, New Haven, CT, USA"],"affiliations":[{"raw_affiliation_string":"Yale University, New Haven, CT, USA","institution_ids":["https://openalex.org/I32971472"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055300310","display_name":"David J. DeWitt","orcid":"https://orcid.org/0009-0007-5037-5205"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David J. DeWitt","raw_affiliation_strings":["Microsoft Inc., Madison, WI, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Inc., Madison, WI, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037742794","display_name":"Samuel Madden","orcid":null},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samuel Madden","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, MA, USA","Massachusetts Institute of Technology; ,; Cambridge MA USA"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"Massachusetts Institute of Technology; ,; Cambridge MA USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074724644","display_name":"Michael Stonebraker","orcid":"https://orcid.org/0000-0001-9184-9058"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Stonebraker","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, MA, USA","Massachusetts Institute of Technology; ,; Cambridge MA USA"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"Massachusetts Institute of Technology; ,; Cambridge MA USA","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5049165312"],"corresponding_institution_ids":["https://openalex.org/I27804330"],"apc_list":null,"apc_paid":null,"fwci":240.3521,"has_fulltext":false,"cited_by_count":1073,"citation_normalized_percentile":{"value":0.99990643,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"165","last_page":"178"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9115642309188843},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5389876961708069},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5246836543083191},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.495251327753067},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.47089627385139465},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.4556359350681305},{"id":"https://openalex.org/keywords/control-flow","display_name":"Control flow","score":0.4407917559146881},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4117870628833771},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.41035541892051697},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3566783666610718},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3247753977775574},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.25389426946640015},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16689527034759521}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9115642309188843},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5389876961708069},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5246836543083191},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.495251327753067},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.47089627385139465},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.4556359350681305},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.4407917559146881},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4117870628833771},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.41035541892051697},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3566783666610718},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3247753977775574},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.25389426946640015},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16689527034759521},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1559845.1559865","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1559845.1559865","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2009 ACM SIGMOD International Conference on Management of data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.44999998807907104,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W299321315","https://openalex.org/W1483819513","https://openalex.org/W1560473542","https://openalex.org/W1571376639","https://openalex.org/W2051244613","https://openalex.org/W2061587502","https://openalex.org/W2088675571","https://openalex.org/W2098935637","https://openalex.org/W2100830825","https://openalex.org/W2119400430","https://openalex.org/W2119565742","https://openalex.org/W2128912667","https://openalex.org/W2133473621","https://openalex.org/W2146635036","https://openalex.org/W2173213060","https://openalex.org/W2200275386","https://openalex.org/W2987597094","https://openalex.org/W6610565695","https://openalex.org/W6679046037"],"related_works":["https://openalex.org/W2526069705","https://openalex.org/W2024016913","https://openalex.org/W2773471004","https://openalex.org/W2909639320","https://openalex.org/W2898398754","https://openalex.org/W3090309567","https://openalex.org/W88591960","https://openalex.org/W4241706480","https://openalex.org/W2035419609","https://openalex.org/W947442053"],"abstract_inverted_index":{"There":[0],"is":[1],"currently":[2],"considerable":[3],"enthusiasm":[4],"around":[5],"the":[6,16,127,135,144,147,159,162],"MapReduce":[7],"(MR)":[8],"paradigm":[9],"for":[10,32,109],"large-scale":[11],"data":[12,131],"analysis":[13],"[17].":[14],"Although":[15,126],"basic":[17],"control":[18],"flow":[19],"of":[20,61,65,78,81,92,112,117,137,150,161,178],"this":[21,48,71],"framework":[22],"has":[23],"existed":[24],"in":[25,63],"parallel":[26,99,138],"SQL":[27],"database":[28],"management":[29],"systems":[30,62,172],"(DBMS)":[31],"over":[33],"20":[34],"years,":[35],"some":[36,123],"have":[37,85],"called":[38],"MR":[39,93,145],"a":[40,75,79,115],"dramatically":[41],"new":[42],"computing":[43],"model":[44],"[8,":[45],"17].":[46],"In":[47],"paper,":[49],"we":[50,57,73,84,104],"describe":[51],"and":[52,67,133,166],"compare":[53],"both":[54,59,176],"paradigms.":[55],"Furthermore,":[56],"evaluate":[58],"kinds":[60,177],"terms":[64],"performance":[66,108,149,164],"development":[68],"complexity.":[69],"To":[70],"end,":[72],"define":[74],"benchmark":[76],"consisting":[77],"collection":[80],"tasks":[82],"that":[83,170],"run":[86],"on":[87,97,114],"an":[88],"open":[89],"source":[90],"version":[91],"as":[94,96],"well":[95],"two":[98],"DBMSs.":[100],"For":[101],"each":[102,106],"task,":[103],"measure":[105],"system's":[107],"various":[110],"degrees":[111],"parallelism":[113],"cluster":[116],"100":[118],"nodes.":[119],"Our":[120],"results":[121],"reveal":[122],"interesting":[124],"trade-offs.":[125],"process":[128],"to":[129],"load":[130],"into":[132],"tune":[134],"execution":[136],"DBMSs":[139,152],"took":[140],"much":[141],"longer":[142],"than":[143],"system,":[146],"observed":[148],"these":[151],"was":[153],"strikingly":[154],"better.":[155],"We":[156],"speculate":[157],"about":[158],"causes":[160],"dramatic":[163],"difference":[165],"consider":[167],"implementation":[168],"concepts":[169],"future":[171],"should":[173],"take":[174],"from":[175],"architectures.":[179]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":14},{"year":2020,"cited_by_count":38},{"year":2019,"cited_by_count":41},{"year":2018,"cited_by_count":69},{"year":2017,"cited_by_count":75},{"year":2016,"cited_by_count":99},{"year":2015,"cited_by_count":113},{"year":2014,"cited_by_count":152},{"year":2013,"cited_by_count":129},{"year":2012,"cited_by_count":121}],"updated_date":"2026-03-28T08:17:26.163206","created_date":"2025-10-10T00:00:00"}
