{"id":"https://openalex.org/W2106625078","doi":"https://doi.org/10.14778/2732977.2733002","title":"SQL-on-Hadoop","display_name":"SQL-on-Hadoop","publication_year":2014,"publication_date":"2014-08-01","ids":{"openalex":"https://openalex.org/W2106625078","doi":"https://doi.org/10.14778/2732977.2733002","mag":"2106625078"},"language":"en","primary_location":{"id":"doi:10.14778/2732977.2733002","is_oa":false,"landing_page_url":"https://doi.org/10.14778/2732977.2733002","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020678151","display_name":"Avrilia Floratou","orcid":"https://orcid.org/0009-0007-5760-8657"},"institutions":[{"id":"https://openalex.org/I4210085935","display_name":"IBM Research - Almaden","ror":"https://ror.org/005w8dd04","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210085935","https://openalex.org/I4210114115"]},{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Avrilia Floratou","raw_affiliation_strings":["IBM Almaden Research Center","IBM Almaden Research Center,#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM Almaden Research Center","institution_ids":["https://openalex.org/I4210085935"]},{"raw_affiliation_string":"IBM Almaden Research Center,#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032253873","display_name":"Umar Farooq Minhas","orcid":"https://orcid.org/0009-0005-6520-3794"},"institutions":[{"id":"https://openalex.org/I4210085935","display_name":"IBM Research - Almaden","ror":"https://ror.org/005w8dd04","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210085935","https://openalex.org/I4210114115"]},{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Umar Farooq Minhas","raw_affiliation_strings":["IBM Almaden Research Center","IBM Almaden Research Center,#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM Almaden Research Center","institution_ids":["https://openalex.org/I4210085935"]},{"raw_affiliation_string":"IBM Almaden Research Center,#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070638387","display_name":"Fatma \u00d6zcan","orcid":"https://orcid.org/0000-0002-4418-4724"},"institutions":[{"id":"https://openalex.org/I4210085935","display_name":"IBM Research - Almaden","ror":"https://ror.org/005w8dd04","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210085935","https://openalex.org/I4210114115"]},{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fatma \u00d6zcan","raw_affiliation_strings":["IBM Almaden Research Center","IBM Almaden Research Center,#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM Almaden Research Center","institution_ids":["https://openalex.org/I4210085935"]},{"raw_affiliation_string":"IBM Almaden Research Center,#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5020678151"],"corresponding_institution_ids":["https://openalex.org/I1341412227","https://openalex.org/I4210085935"],"apc_list":null,"apc_paid":null,"fwci":62.9893,"has_fulltext":false,"cited_by_count":148,"citation_normalized_percentile":{"value":0.99903493,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"7","issue":"12","first_page":"1295","last_page":"1306"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8129446506500244},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.7140438556671143},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5685084462165833},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.4962316155433655},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.45962774753570557},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.443729430437088},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.4325811266899109},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.40259671211242676}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8129446506500244},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.7140438556671143},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5685084462165833},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.4962316155433655},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.45962774753570557},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.443729430437088},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.4325811266899109},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.40259671211242676},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/2732977.2733002","is_oa":false,"landing_page_url":"https://doi.org/10.14778/2732977.2733002","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1993892970","https://openalex.org/W2044490410","https://openalex.org/W2074935284","https://openalex.org/W2075345089","https://openalex.org/W2085601491","https://openalex.org/W2087177601","https://openalex.org/W2103337291","https://openalex.org/W2114854276","https://openalex.org/W2123866731","https://openalex.org/W2125775320","https://openalex.org/W2139072600","https://openalex.org/W2139445852","https://openalex.org/W2172220707","https://openalex.org/W3138367763"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4390608645","https://openalex.org/W4247566972","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351","https://openalex.org/W4206777497","https://openalex.org/W3158763334"],"abstract_inverted_index":{"SQL":[0,18],"query":[1],"processing":[2],"for":[3,152,179,194],"analytics":[4],"over":[5,20,63],"Hadoop":[6,27],"data":[7,69],"has":[8],"recently":[9],"gained":[10],"significant":[11],"traction.":[12],"Among":[13],"many":[14],"systems":[15,55,66,92],"providing":[16],"some":[17],"support":[19],"Hadoop,":[21],"Hive":[22,139,149,167,176],"is":[23,131,158],"the":[24,45,49,87,114,153,180,192,200],"first":[25],"native":[26],"system":[28],"that":[29,56,129],"uses":[30],"an":[31],"underlying":[32],"framework":[33],"such":[34],"as":[35],"MapReduce":[36,141,169],"or":[37],"Tez":[38,151,178],"to":[39,134,145,162],"process":[40],"SQL-like":[41],"statements.":[42],"Impala,":[43],"on":[44,140,150,168,177],"other":[46],"hand,":[47],"represents":[48],"new":[50],"emerging":[51],"class":[52],"of":[53,89,97,117,124,187,204],"SQL-on-Hadoop":[54],"exploit":[57],"a":[58,95,101,122],"shared-nothing":[59],"parallel":[60],"database":[61],"architecture":[62],"Hadoop.":[64],"Both":[65],"optimize":[67],"their":[68,118],"ingestion":[70],"via":[71],"columnar":[72,119],"storage,":[73],"and":[74,80,105,142,170,198,202],"promote":[75],"different":[76],"file":[77],"formats:":[78],"ORC":[79],"Parquet.":[81],"In":[82],"this":[83,195],"paper,":[84],"we":[85,190],"compare":[86],"performance":[88,196],"these":[90],"two":[91,106],"by":[93],"conducting":[94],"set":[96,123],"cluster":[98],"experiments":[99],"using":[100,121],"TPC-H":[102,155],"like":[103],"benchmark":[104],"TPC-DS":[107,181],"inspired":[108,182],"workloads.":[109],"We":[110],"also":[111,159],"closely":[112],"study":[113],"I/O":[115],"efficiency":[116],"formats":[120],"micro-benchmarks.":[125],"Our":[126],"results":[127],"show":[128],"Impala":[130,157],"3.3":[132],"X":[133,136,144,147,161,164,173],"4.4":[135],"faster":[137,165,174],"than":[138,148,166,175],"2.1":[143],"2.8":[146],"overall":[154],"experiments.":[156,183],"8.2":[160],"10":[163],"about":[171],"4.3":[172],"Through":[184],"detailed":[185],"analysis":[186],"experimental":[188],"results,":[189],"identify":[191],"reasons":[193],"gap":[197],"examine":[199],"strengths":[201],"limitations":[203],"each":[205],"system.":[206]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":19},{"year":2018,"cited_by_count":21},{"year":2017,"cited_by_count":25},{"year":2016,"cited_by_count":26},{"year":2015,"cited_by_count":20},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":6}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2016-06-24T00:00:00"}
