{"id":"https://openalex.org/W1868798185","doi":"https://doi.org/10.1109/bigdata.2015.7363840","title":"Scientific computing meets big data technology: An astronomy use case","display_name":"Scientific computing meets big data technology: An astronomy use case","publication_year":2015,"publication_date":"2015-10-01","ids":{"openalex":"https://openalex.org/W1868798185","doi":"https://doi.org/10.1109/bigdata.2015.7363840","mag":"1868798185"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2015.7363840","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7363840","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100423006","display_name":"Zhao Zhang","orcid":"https://orcid.org/0000-0002-0862-1093"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhao Zhang","raw_affiliation_strings":["Berkeley Institute for Data Science, University of California, Berkeley","AMPLab, University of California, Berkeley"],"affiliations":[{"raw_affiliation_string":"Berkeley Institute for Data Science, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"AMPLab, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048639381","display_name":"K. Barbary","orcid":"https://orcid.org/0000-0002-2532-3696"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kyle Barbary","raw_affiliation_strings":["Berkeley Center for Cosmological Physics, University of California, Berkeley","Berkeley Institute for Data Science, University of California, Berkeley"],"affiliations":[{"raw_affiliation_string":"Berkeley Center for Cosmological Physics, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Berkeley Institute for Data Science, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008077599","display_name":"Frank Austin Nothaft","orcid":"https://orcid.org/0000-0002-7729-7055"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Frank Austin Nothaft","raw_affiliation_strings":["ASPIRE Lab, University of California, Berkeley","AMPLab, University of California, Berkeley"],"affiliations":[{"raw_affiliation_string":"ASPIRE Lab, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"AMPLab, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042005594","display_name":"Evan Sparks","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Evan Sparks","raw_affiliation_strings":["AMPLab, University of California, Berkeley"],"affiliations":[{"raw_affiliation_string":"AMPLab, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109251988","display_name":"O. Zahn","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Oliver Zahn","raw_affiliation_strings":["Berkeley Center for Cosmological Physics, University of California, Berkeley"],"affiliations":[{"raw_affiliation_string":"Berkeley Center for Cosmological Physics, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102019638","display_name":"Michael J. Franklin","orcid":"https://orcid.org/0000-0003-3332-8574"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael J. Franklin","raw_affiliation_strings":["Berkeley Institute for Data Science, University of California, Berkeley","AMPLab, University of California, Berkeley"],"affiliations":[{"raw_affiliation_string":"Berkeley Institute for Data Science, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"AMPLab, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077202069","display_name":"David A. Patterson","orcid":"https://orcid.org/0000-0003-0902-7093"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David A. Patterson","raw_affiliation_strings":["ASPIRE Lab, University of California, Berkeley","AMPLab, University of California, Berkeley"],"affiliations":[{"raw_affiliation_string":"ASPIRE Lab, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"AMPLab, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030135192","display_name":"S. Perlmutter","orcid":"https://orcid.org/0000-0002-4436-4661"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Saul Perlmutter","raw_affiliation_strings":["Berkeley Center for Cosmological Physics, University of California, Berkeley","Berkeley Institute for Data Science, University of California, Berkeley"],"affiliations":[{"raw_affiliation_string":"Berkeley Center for Cosmological Physics, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Berkeley Institute for Data Science, University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100423006"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":17.2913,"has_fulltext":false,"cited_by_count":48,"citation_normalized_percentile":{"value":0.98934314,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"918","last_page":"927"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7872873544692993},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.7284203767776489},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.6479406952857971},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6422162055969238},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.6182016730308533},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5508471131324768},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5491729378700256},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5244117975234985},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3789834976196289},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12459757924079895}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7872873544692993},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.7284203767776489},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.6479406952857971},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6422162055969238},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.6182016730308533},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5508471131324768},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5491729378700256},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5244117975234985},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3789834976196289},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12459757924079895}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata.2015.7363840","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7363840","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:escholarship.org/ark:/13030/qt0td139f0","is_oa":false,"landing_page_url":"https://escholarship.org/uc/item/0td139f0","pdf_url":null,"source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W26797728","https://openalex.org/W1493893823","https://openalex.org/W1798241237","https://openalex.org/W1978924650","https://openalex.org/W1989017925","https://openalex.org/W1991001523","https://openalex.org/W2006887358","https://openalex.org/W2013455564","https://openalex.org/W2055043387","https://openalex.org/W2081612620","https://openalex.org/W2083055186","https://openalex.org/W2096125134","https://openalex.org/W2096812688","https://openalex.org/W2102130607","https://openalex.org/W2119738171","https://openalex.org/W2122465391","https://openalex.org/W2131229759","https://openalex.org/W2131975293","https://openalex.org/W2140332639","https://openalex.org/W2142238829","https://openalex.org/W2157917411","https://openalex.org/W2189465200","https://openalex.org/W2319310062","https://openalex.org/W2325507490","https://openalex.org/W2406836379","https://openalex.org/W2990075684","https://openalex.org/W3104062568","https://openalex.org/W3125398447","https://openalex.org/W3139073351","https://openalex.org/W4234578858","https://openalex.org/W4236236547","https://openalex.org/W6629380029","https://openalex.org/W6679815717","https://openalex.org/W6687322159","https://openalex.org/W6699821053","https://openalex.org/W6701072765","https://openalex.org/W6713707562","https://openalex.org/W6770669002","https://openalex.org/W6785671740","https://openalex.org/W6845778248"],"related_works":["https://openalex.org/W2998381397","https://openalex.org/W4236419692","https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2187181201","https://openalex.org/W2766461310","https://openalex.org/W4247566972","https://openalex.org/W4388692845","https://openalex.org/W3202731209","https://openalex.org/W3211874991"],"abstract_inverted_index":{"Scientific":[0],"analyses":[1],"commonly":[2],"compose":[3],"multiple":[4],"single-process":[5,14],"programs":[6,15],"into":[7],"a":[8,19,48,61,80,123,134,188],"dataflow.":[9],"An":[10],"end-to-end":[11],"dataflow":[12,102],"of":[13,108],"is":[16],"known":[17],"as":[18,93,184],"many-task":[20,56,192],"application.":[21],"Typically,":[22],"tools":[23],"from":[24],"the":[25,75,94,99,113,141,164,169],"HPC":[26],"software":[27,151],"stack":[28],"are":[29,187],"used":[30],"to":[31,54,78,163],"parallelize":[32,55],"these":[33],"analyses.":[34],"In":[35],"this":[36],"work,":[37],"we":[38,97,146],"investigate":[39],"an":[40,128],"alternate":[41],"approach":[42],"that":[43,148,178],"uses":[44],"Apache":[45,70,109,185],"Spark":[46,110,186],"-":[47,53],"modern":[49],"big":[50,155],"data":[51,118,156],"platform":[52],"applications.":[57,194],"We":[58,72],"present":[59],"Kira,":[60],"flexible":[62],"and":[63,106],"distributed":[64],"astronomy":[65,85],"image":[66],"processing":[67],"toolkit":[68,77],"using":[69,137],"Spark.":[71],"then":[73],"use":[74,95],"Kira":[76,88,91,120,158,176],"implement":[79],"Source":[81],"Extractor":[82],"application":[83],"for":[84,154,191],"images,":[86],"called":[87],"SE.":[89],"With":[90],"SE":[92,121,159],"case,":[96],"study":[98],"programming":[100],"flexibility,":[101],"richness,":[103],"scheduling":[104],"capacity":[105],"performance":[107,162],"running":[111,167],"on":[112,140,168],"EC2":[114,143],"cloud.":[115,144],"By":[116],"exploiting":[117],"locality,":[119],"achieves":[122,160],"3.7":[124],"\u03c7":[125],"speedup":[126],"over":[127],"equivalent":[129],"C":[130,165],"program":[131],"when":[132],"analyzing":[133],"1TB":[135],"dataset":[136],"512":[138],"cores":[139],"Amazon":[142],"Furthermore,":[145],"show":[147],"by":[149],"leveraging":[150],"originally":[152],"designed":[153],"infrastructure,":[157],"competitive":[161],"implementation":[166],"NERSC":[170],"Edison":[171],"supercomputer.":[172],"Our":[173],"experience":[174],"with":[175],"indicates":[177],"emerging":[179],"Big":[180],"Data":[181],"platforms":[182],"such":[183],"performant":[189],"alternative":[190],"scientific":[193]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":14},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":10}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
