{"id":"https://openalex.org/W2161692763","doi":"https://doi.org/10.1145/2588555.2612185","title":"Parallel data analysis directly on scientific file formats","display_name":"Parallel data analysis directly on scientific file formats","publication_year":2014,"publication_date":"2014-06-18","ids":{"openalex":"https://openalex.org/W2161692763","doi":"https://doi.org/10.1145/2588555.2612185","mag":"2161692763"},"language":"en","primary_location":{"id":"doi:10.1145/2588555.2612185","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2588555.2612185","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2014 ACM SIGMOD International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001035930","display_name":"Spyros Blanas","orcid":"https://orcid.org/0009-0004-2703-7177"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Spyros Blanas","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043129695","display_name":"Kesheng Wu","orcid":"https://orcid.org/0000-0002-6907-3393"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kesheng Wu","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062233562","display_name":"Suren Byna","orcid":"https://orcid.org/0000-0003-3048-3448"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Surendra Byna","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101687868","display_name":"Bin Dong","orcid":"https://orcid.org/0000-0002-0725-0833"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bin Dong","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109133252","display_name":"Arie Shoshani","orcid":null},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arie Shoshani","raw_affiliation_strings":["Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5001035930"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":11.5113,"has_fulltext":false,"cited_by_count":92,"citation_normalized_percentile":{"value":0.98675761,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"385","last_page":"396"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8597114086151123},{"id":"https://openalex.org/keywords/netcdf","display_name":"NetCDF","score":0.8543529510498047},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.7614569067955017},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.5859487056732178},{"id":"https://openalex.org/keywords/relational-database-management-system","display_name":"Relational database management system","score":0.5450856685638428},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5250056982040405},{"id":"https://openalex.org/keywords/data-management","display_name":"Data management","score":0.46781158447265625},{"id":"https://openalex.org/keywords/file-format","display_name":"File format","score":0.4309979975223541},{"id":"https://openalex.org/keywords/file-system","display_name":"File system","score":0.43022122979164124},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.4183007478713989},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36369678378105164},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.19119831919670105},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.16099315881729126}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8597114086151123},{"id":"https://openalex.org/C11090531","wikidata":"https://www.wikidata.org/wiki/Q1361922","display_name":"NetCDF","level":2,"score":0.8543529510498047},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.7614569067955017},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5859487056732178},{"id":"https://openalex.org/C24394798","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database management system","level":3,"score":0.5450856685638428},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5250056982040405},{"id":"https://openalex.org/C1668388","wikidata":"https://www.wikidata.org/wiki/Q1149776","display_name":"Data management","level":2,"score":0.46781158447265625},{"id":"https://openalex.org/C97250363","wikidata":"https://www.wikidata.org/wiki/Q235557","display_name":"File format","level":2,"score":0.4309979975223541},{"id":"https://openalex.org/C2780940931","wikidata":"https://www.wikidata.org/wiki/Q174989","display_name":"File system","level":2,"score":0.43022122979164124},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.4183007478713989},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36369678378105164},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.19119831919670105},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.16099315881729126}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2588555.2612185","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2588555.2612185","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2014 ACM SIGMOD International Conference on Management of Data","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.719.4754","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.719.4754","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://sdm.lbl.gov/%7Esbyna/research/papers/201406-SIGMOD-sds.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W597956076","https://openalex.org/W1501548698","https://openalex.org/W1521148341","https://openalex.org/W1523525632","https://openalex.org/W1532114435","https://openalex.org/W1982945449","https://openalex.org/W1992191167","https://openalex.org/W1998556518","https://openalex.org/W2003421875","https://openalex.org/W2004010764","https://openalex.org/W2014830756","https://openalex.org/W2026651123","https://openalex.org/W2041564575","https://openalex.org/W2042992924","https://openalex.org/W2043099794","https://openalex.org/W2060098952","https://openalex.org/W2068410942","https://openalex.org/W2081612620","https://openalex.org/W2083036673","https://openalex.org/W2089050662","https://openalex.org/W2092324613","https://openalex.org/W2096165224","https://openalex.org/W2103207352","https://openalex.org/W2104486653","https://openalex.org/W2107763879","https://openalex.org/W2109855559","https://openalex.org/W2117156932","https://openalex.org/W2117538598","https://openalex.org/W2121243315","https://openalex.org/W2124851765","https://openalex.org/W2126583885","https://openalex.org/W2157370893","https://openalex.org/W2159825340","https://openalex.org/W2226979724","https://openalex.org/W2324604766","https://openalex.org/W2406955896","https://openalex.org/W2924055038","https://openalex.org/W2996540471","https://openalex.org/W3098294620","https://openalex.org/W3138367763","https://openalex.org/W4206835082","https://openalex.org/W6678708722"],"related_works":["https://openalex.org/W2505630977","https://openalex.org/W4385585331","https://openalex.org/W2383709723","https://openalex.org/W1567213510","https://openalex.org/W2626533837","https://openalex.org/W2375584934","https://openalex.org/W2761120596","https://openalex.org/W1575529579","https://openalex.org/W2525788546","https://openalex.org/W1988380406"],"abstract_inverted_index":{"Scientific":[0],"experiments":[1],"and":[2,17,26,60,65,80,100,138,143,173,199,216,223,243],"large-scale":[3,197],"simulations":[4],"produce":[5],"massive":[6],"amounts":[7],"of":[8,11,58,112,155,208],"data.":[9],"Many":[10],"these":[12,46],"scientific":[13,29,94,115,274],"datasets":[14],"are":[15,18,36,42],"arrays,":[16,40],"stored":[19,126],"in":[20,44,127,168,196,254,270,272],"file":[21,130,202],"formats":[22],"such":[23,33],"as":[24,34],"HDF5":[25,129,183],"NetCDF.":[27],"Although":[28],"data":[30,62,88,95,116,125,140,184,253,260],"management":[31,89],"systems,":[32],"SciDB,":[35],"designed":[37],"to":[38,70,150,190],"manipulate":[39],"there":[41],"challenges":[43],"integrating":[45,86],"systems":[47],"into":[48,91],"existing":[49,78,93],"analysis":[50,96,117,157,275],"workflows.":[51,276],"Major":[52],"barriers":[53],"include":[54],"the":[55,67,77,110,128,136,145,151,156,162,182,192,206,233,238,259],"expensive":[56],"task":[57],"preparing":[59],"loading":[61,141,261],"before":[63],"querying,":[64],"converting":[66],"final":[68],"results":[69,147],"a":[71,84,87,113,212,220,224],"format":[72,185],"that":[73,119,237],"is":[74,98,244,267],"understood":[75],"by":[76,257],"post-processing":[79],"visualization":[81],"tools.":[82],"As":[83],"consequence,":[85],"system":[90,118,210,215,230,236,266],"an":[92],"workflow":[97],"time-consuming":[99],"requires":[101],"extensive":[102],"user":[103],"involvement.":[104],"In":[105,177],"this":[106],"paper,":[107],"we":[108],"present":[109],"design":[111,133,160],"new":[114],"efficiently":[120],"processes":[121],"queries":[122],"directly":[123],"over":[124,181],"format.":[131],"This":[132],"choice":[134],"eliminates":[135],"tedious":[137],"error-prone":[139],"process,":[142],"makes":[144],"query":[146,175,179,264],"readily":[148],"available":[149,195],"next":[152],"processing":[153,180,252,265],"steps":[154],"workflow.":[158],"Our":[159,229],"leverages":[161],"increasing":[163],"main":[164],"memory":[165],"capacities":[166],"found":[167],"supercomputers":[169,198],"through":[170],"bitmap":[171],"indexing":[172],"in-memory":[174],"execution.":[176],"addition,":[178],"can":[186],"be":[187],"effortlessly":[188],"parallelized":[189],"utilize":[191],"ample":[193],"concurrency":[194],"modern":[200],"parallel":[201],"systems.":[203],"We":[204],"evaluate":[205],"performance":[207],"our":[209,263],"on":[211],"large":[213],"supercomputing":[214],"experiment":[217],"with":[218],"both":[219],"synthetic":[221],"dataset":[222],"real":[225],"cosmology":[226,239],"observation":[227],"dataset.":[228],"frequently":[231],"outperforms":[232],"relational":[234],"database":[235],"team":[240],"currently":[241],"uses,":[242],"more":[245,268],"than":[246,249],"10X":[247],"faster":[248],"Hive":[250],"when":[251],"parallel.":[255],"Overall,":[256],"eliminating":[258],"step,":[262],"effective":[269],"supporting":[271],"situ":[273]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":13},{"year":2017,"cited_by_count":13},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":2}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
