{"id":"https://openalex.org/W3004799631","doi":"https://doi.org/10.1155/2020/6364752","title":"Query Execution Optimization in Spark SQL","display_name":"Query Execution Optimization in Spark SQL","publication_year":2020,"publication_date":"2020-02-07","ids":{"openalex":"https://openalex.org/W3004799631","doi":"https://doi.org/10.1155/2020/6364752","mag":"3004799631"},"language":"en","primary_location":{"id":"doi:10.1155/2020/6364752","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2020/6364752","pdf_url":"https://downloads.hindawi.com/journals/sp/2020/6364752.pdf","source":{"id":"https://openalex.org/S166774750","display_name":"Scientific Programming","issn_l":"1058-9244","issn":["1058-9244","1875-919X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319869","host_organization_name":"Hindawi Publishing Corporation","host_organization_lineage":["https://openalex.org/P4310319869"],"host_organization_lineage_names":["Hindawi Publishing Corporation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Scientific Programming","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://downloads.hindawi.com/journals/sp/2020/6364752.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011713058","display_name":"Xuechun Ji","orcid":"https://orcid.org/0000-0003-3654-0204"},"institutions":[{"id":"https://openalex.org/I4210118629","display_name":"NARI Group (China)","ror":"https://ror.org/02egn3136","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210118629"]},{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuechun Ji","raw_affiliation_strings":["NARI Research Institute NARI Technology, Nanjing, China","School of Computer Science and Engineering, Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-3654-0204","affiliations":[{"raw_affiliation_string":"NARI Research Institute NARI Technology, Nanjing, China","institution_ids":["https://openalex.org/I4210118629"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034989205","display_name":"Maoxian Zhao","orcid":"https://orcid.org/0000-0002-9075-6682"},"institutions":[{"id":"https://openalex.org/I80143920","display_name":"Shandong University of Science and Technology","ror":"https://ror.org/04gtjhw98","country_code":"CN","type":"education","lineage":["https://openalex.org/I80143920"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Maoxian Zhao","raw_affiliation_strings":["College of Mathematics and Systems Science, Shandong University of Science and Technology, Qingdao, China"],"raw_orcid":"https://orcid.org/0000-0002-9075-6682","affiliations":[{"raw_affiliation_string":"College of Mathematics and Systems Science, Shandong University of Science and Technology, Qingdao, China","institution_ids":["https://openalex.org/I80143920"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042336611","display_name":"Mingyu Zhai","orcid":null},"institutions":[{"id":"https://openalex.org/I4210118629","display_name":"NARI Group (China)","ror":"https://ror.org/02egn3136","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210118629"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyu Zhai","raw_affiliation_strings":["NARI Research Institute NARI Technology, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NARI Research Institute NARI Technology, Nanjing, China","institution_ids":["https://openalex.org/I4210118629"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037599201","display_name":"Qingxi Wu","orcid":"https://orcid.org/0000-0003-4716-4144"},"institutions":[{"id":"https://openalex.org/I4210118629","display_name":"NARI Group (China)","ror":"https://ror.org/02egn3136","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210118629"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingxi Wu","raw_affiliation_strings":["NARI Research Institute NARI Technology, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-4716-4144","affiliations":[{"raw_affiliation_string":"NARI Research Institute NARI Technology, Nanjing, China","institution_ids":["https://openalex.org/I4210118629"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5034989205"],"corresponding_institution_ids":["https://openalex.org/I80143920"],"apc_list":{"value":1800,"currency":"USD","value_usd":1800},"apc_paid":{"value":1800,"currency":"USD","value_usd":1800},"fwci":0.8351,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.79771461,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"2020","issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8822154998779297},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.7643036842346191},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.7502589225769043},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.5791307687759399},{"id":"https://openalex.org/keywords/query-by-example","display_name":"Query by Example","score":0.525295078754425},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.48294585943222046},{"id":"https://openalex.org/keywords/sargable","display_name":"Sargable","score":0.4495556950569153},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.44928810000419617},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.4398260712623596},{"id":"https://openalex.org/keywords/stored-procedure","display_name":"Stored procedure","score":0.42115283012390137},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.34888148307800293},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.21803003549575806},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.14111489057540894},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.12471219897270203},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.10154959559440613}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8822154998779297},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.7643036842346191},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.7502589225769043},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.5791307687759399},{"id":"https://openalex.org/C194222762","wikidata":"https://www.wikidata.org/wiki/Q114486","display_name":"Query by Example","level":4,"score":0.525295078754425},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.48294585943222046},{"id":"https://openalex.org/C192939062","wikidata":"https://www.wikidata.org/wiki/Q104840822","display_name":"Sargable","level":4,"score":0.4495556950569153},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.44928810000419617},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.4398260712623596},{"id":"https://openalex.org/C154420247","wikidata":"https://www.wikidata.org/wiki/Q846619","display_name":"Stored procedure","level":5,"score":0.42115283012390137},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.34888148307800293},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.21803003549575806},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14111489057540894},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.12471219897270203},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.10154959559440613}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1155/2020/6364752","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2020/6364752","pdf_url":"https://downloads.hindawi.com/journals/sp/2020/6364752.pdf","source":{"id":"https://openalex.org/S166774750","display_name":"Scientific Programming","issn_l":"1058-9244","issn":["1058-9244","1875-919X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319869","host_organization_name":"Hindawi Publishing Corporation","host_organization_lineage":["https://openalex.org/P4310319869"],"host_organization_lineage_names":["Hindawi Publishing Corporation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Scientific Programming","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:0a4b0404ccc747d88931f325dbcc1a10","is_oa":true,"landing_page_url":"https://doaj.org/article/0a4b0404ccc747d88931f325dbcc1a10","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Scientific Programming, Vol 2020 (2020)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1155/2020/6364752","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2020/6364752","pdf_url":"https://downloads.hindawi.com/journals/sp/2020/6364752.pdf","source":{"id":"https://openalex.org/S166774750","display_name":"Scientific Programming","issn_l":"1058-9244","issn":["1058-9244","1875-919X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319869","host_organization_name":"Hindawi Publishing Corporation","host_organization_lineage":["https://openalex.org/P4310319869"],"host_organization_lineage_names":["Hindawi Publishing Corporation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Scientific Programming","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7699999809265137,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320326707","display_name":"State Grid Corporation of China","ror":"https://ror.org/05twwhs70"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3004799631.pdf","grobid_xml":"https://content.openalex.org/works/W3004799631.grobid-xml"},"referenced_works_count":3,"referenced_works":["https://openalex.org/W2110086534","https://openalex.org/W2138474206","https://openalex.org/W2243803726"],"related_works":["https://openalex.org/W2955368753","https://openalex.org/W2391725132","https://openalex.org/W2384284661","https://openalex.org/W319014924","https://openalex.org/W2588167896","https://openalex.org/W1987288934","https://openalex.org/W2389888740","https://openalex.org/W2362460270","https://openalex.org/W2188347403","https://openalex.org/W2550810496"],"abstract_inverted_index":{"Spark":[0,20,39,63,114,152,168],"SQL":[1,115,143,169],"is":[2],"a":[3,120],"big":[4],"data":[5,10,29,52,111,172],"processing":[6],"tool":[7],"for":[8,88,99],"structured":[9],"query":[11,76,163,189],"and":[12,48,60,133,146,196],"analysis.":[13],"However,":[14],"due":[15],"to":[16,26,30,65,105,154],"the":[17,31,35,43,56,61,67,75,83,92,106,109,113,129,140,150,156,162,166,182,188,192,201],"execution":[18,36],"of":[19,38,69,85,108,131,200],"SQL,":[21],"there":[22],"are":[23],"multiple":[24],"times":[25],"write":[27],"intermediate":[28,51,110],"disk,":[32],"which":[33,125],"reduces":[34],"efficiency":[37],"SQL.":[40],"Targeting":[41],"on":[42],"existing":[44,167],"issues,":[45],"we":[46,79],"design":[47],"implement":[49],"an":[50],"cache":[53,86],"layer":[54,87],"between":[55],"underlying":[57],"file":[58],"system":[59,153],"upper":[62],"core":[64],"reduce":[66,128,191],"cost":[68,130,195],"random":[70],"disk":[71,193],"I/O.":[72],"By":[73],"using":[74],"pre-analysis":[77],"module,":[78],"can":[80,95,126,185],"dynamically":[81],"adjust":[82],"capacity":[84],"different":[89],"queries.":[90],"And":[91],"allocation":[93],"module":[94,145,184],"allocate":[96],"proper":[97],"memory":[98,203],"each":[100],"node":[101],"in":[102,112],"cluster.":[103],"According":[104],"sharing":[107],"workflow,":[116],"this":[117],"paper":[118,138,160],"proposes":[119],"cost-based":[121],"correlation":[122],"merging":[123],"algorithm,":[124],"effectively":[127,186],"reading":[132],"writing":[134],"redundant":[135],"data.":[136],"This":[137,159],"develops":[139],"SSO":[141,183],"(Spark":[142],"Optimizer)":[144],"integrates":[147],"it":[148],"into":[149],"original":[151],"achieve":[155],"above":[157],"functions.":[158],"compares":[161],"performance":[164],"with":[165],"by":[170,174],"experiment":[171],"generated":[173],"TPC-H":[175],"tool.":[176],"The":[177],"experimental":[178],"results":[179],"show":[180],"that":[181],"improve":[187],"efficiency,":[190],"I/O":[194],"make":[197],"full":[198],"use":[199],"cluster":[202],"resources.":[204]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
