{"id":"https://openalex.org/W2037712729","doi":"https://doi.org/10.1145/1645164.1645176","title":"Kepler + Hadoop","display_name":"Kepler + Hadoop","publication_year":2009,"publication_date":"2009-11-16","ids":{"openalex":"https://openalex.org/W2037712729","doi":"https://doi.org/10.1145/1645164.1645176","mag":"2037712729"},"language":"en","primary_location":{"id":"doi:10.1145/1645164.1645176","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1645164.1645176","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th Workshop on Workflows in Support of Large-Scale Science","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://mdsoar.org/bitstreams/ab84436c-6b95-4d8a-9315-6c15f7dbf02b/download","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101750217","display_name":"Jianwu Wang","orcid":"https://orcid.org/0000-0002-9933-1170"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jianwu Wang","raw_affiliation_strings":["University of California, San Diego, La Jolla, CA"],"affiliations":[{"raw_affiliation_string":"University of California, San Diego, La Jolla, CA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002534857","display_name":"Daniel Crawl","orcid":"https://orcid.org/0000-0003-1013-8241"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Crawl","raw_affiliation_strings":["University of California, San Diego, La Jolla, CA"],"affiliations":[{"raw_affiliation_string":"University of California, San Diego, La Jolla, CA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028701796","display_name":"\u0130lkay Alt\u0131nta\u015f","orcid":"https://orcid.org/0000-0002-2196-0305"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ilkay Altintas","raw_affiliation_strings":["University of California, San Diego, La Jolla, CA"],"affiliations":[{"raw_affiliation_string":"University of California, San Diego, La Jolla, CA","institution_ids":["https://openalex.org/I36258959"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101750217"],"corresponding_institution_ids":["https://openalex.org/I36258959"],"apc_list":null,"apc_paid":null,"fwci":25.954,"has_fulltext":true,"cited_by_count":98,"citation_normalized_percentile":{"value":0.99421113,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/kepler","display_name":"Kepler","score":0.7883012890815735},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.648919939994812},{"id":"https://openalex.org/keywords/astrobiology","display_name":"Astrobiology","score":0.40747225284576416},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.14180737733840942},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.05510467290878296}],"concepts":[{"id":"https://openalex.org/C207963374","wikidata":"https://www.wikidata.org/wiki/Q47592","display_name":"Kepler","level":3,"score":0.7883012890815735},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.648919939994812},{"id":"https://openalex.org/C87355193","wikidata":"https://www.wikidata.org/wiki/Q411","display_name":"Astrobiology","level":1,"score":0.40747225284576416},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.14180737733840942},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.05510467290878296},{"id":"https://openalex.org/C150846664","wikidata":"https://www.wikidata.org/wiki/Q7602306","display_name":"Stars","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1645164.1645176","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1645164.1645176","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th Workshop on Workflows in Support of Large-Scale Science","raw_type":"proceedings-article"},{"id":"pmh:oai:mdsoar.org:11603/31656","is_oa":true,"landing_page_url":"http://hdl.handle.net/11603/31656","pdf_url":"https://mdsoar.org/bitstreams/ab84436c-6b95-4d8a-9315-6c15f7dbf02b/download","source":{"id":"https://openalex.org/S4306402556","display_name":"Maryland Shared Open Access Repository (USMAI Consortium)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},{"id":"doi:10.13016/m2joeu-42lu","is_oa":true,"landing_page_url":"https://doi.org/10.13016/m2joeu-42lu","pdf_url":null,"source":{"id":"https://openalex.org/S4306402644","display_name":"Digital Repository at the University of Maryland (University of Maryland College Park)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I66946132","host_organization_name":"University of Maryland, College Park","host_organization_lineage":["https://openalex.org/I66946132"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:mdsoar.org:11603/31656","is_oa":true,"landing_page_url":"http://hdl.handle.net/11603/31656","pdf_url":"https://mdsoar.org/bitstreams/ab84436c-6b95-4d8a-9315-6c15f7dbf02b/download","source":{"id":"https://openalex.org/S4306402556","display_name":"Maryland Shared Open Access Repository (USMAI Consortium)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.4699999988079071}],"awards":[{"id":"https://openalex.org/G2978036855","display_name":null,"funder_award_id":"OCI-0722079","funder_id":"https://openalex.org/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"},{"id":"https://openalex.org/G373441930","display_name":null,"funder_award_id":"DBI 0619060","funder_id":"https://openalex.org/F4320337398","funder_display_name":"Division of Biological Infrastructure"},{"id":"https://openalex.org/G5238406678","display_name":null,"funder_award_id":"DE-FC02-07ER25811","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320337377","display_name":"Office of Advanced Cyberinfrastructure","ror":"https://ror.org/04nh1dc89"},{"id":"https://openalex.org/F4320337398","display_name":"Division of Biological Infrastructure","ror":"https://ror.org/04qn9mx93"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2037712729.pdf","grobid_xml":"https://content.openalex.org/works/W2037712729.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W126044612","https://openalex.org/W1578100948","https://openalex.org/W1780933788","https://openalex.org/W1983833794","https://openalex.org/W2013221213","https://openalex.org/W2019447492","https://openalex.org/W2059960457","https://openalex.org/W2064238511","https://openalex.org/W2100928478","https://openalex.org/W2102130607","https://openalex.org/W2121762798","https://openalex.org/W2122465391","https://openalex.org/W2146917903","https://openalex.org/W2159394607","https://openalex.org/W2159528712","https://openalex.org/W2166630248","https://openalex.org/W2168103533","https://openalex.org/W2168281637","https://openalex.org/W2254543411","https://openalex.org/W2624304035","https://openalex.org/W4254234063"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2577502644","https://openalex.org/W2734892257","https://openalex.org/W4248372522","https://openalex.org/W2576891438","https://openalex.org/W2024447132","https://openalex.org/W2940411870","https://openalex.org/W4240411503","https://openalex.org/W3212579243","https://openalex.org/W2094158195"],"abstract_inverted_index":{"MapReduce":[0,14,90,134],"provides":[1],"a":[2,146,162],"parallel":[3,25],"and":[4,11,15,36,40,53,70,88,111,114,139],"scalable":[5],"programming":[6],"model":[7],"for":[8],"data-intensive":[9],"business":[10],"scientific":[12,45,64,94,103],"applications.":[13],"its":[16],"de":[17],"facto":[18],"open":[19],"source":[20],"project,":[21],"called":[22],"Hadoop,":[23],"support":[24],"processing":[26],"on":[27],"large":[28],"datasets":[29],"with":[30,76,142],"capabilities":[31],"including":[32],"automatic":[33],"data":[34,69],"partitioning":[35],"distribution,":[37],"load":[38],"balancing,":[39],"fault":[41],"tolerance":[42],"management.":[43],"Meanwhile,":[44],"workflow":[46,104,147],"management":[47,105],"systems,":[48,106],"e.g.,":[49,107],"Kepler,":[50,77,129],"Taverna,":[51],"Triana,":[52],"Pegasus,":[54],"have":[55],"demonstrated":[56],"their":[57,136],"ability":[58],"to":[59,86,119],"help":[60],"domain":[61],"scientists":[62,130],"solve":[63],"problems":[65,138],"by":[66],"synthesizing":[67],"different":[68],"computing":[71],"resources.":[72],"By":[73],"integrating":[74],"Hadoop":[75,126],"we":[78],"provide":[79],"an":[80],"easy-to-use":[81],"architecture":[82],"that":[83,99],"facilitates":[84],"users":[85],"compose":[87],"execute":[89],"applications":[91],"in":[92,128,135,145],"Kepler":[93,150],"workflows.":[95],"Our":[96],"implementation":[97],"demonstrates":[98],"many":[100],"characteristics":[101],"of":[102,121,158],"graphical":[108,151],"user":[109,152],"interface":[110],"component":[112],"reuse":[113],"sharing,":[115],"are":[116],"very":[117],"complementary":[118],"those":[120],"MapReduce.":[122],"Using":[123],"the":[124,149,156],"presented":[125],"components":[127],"can":[131],"easily":[132],"utilize":[133],"domain-specific":[137],"connect":[140],"them":[141],"other":[143],"tasks":[144],"through":[148],"interface.":[153],"We":[154],"validate":[155],"feasibility":[157],"our":[159],"approach":[160],"via":[161],"word":[163],"count":[164],"use":[165],"case.":[166]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":7},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":18},{"year":2013,"cited_by_count":14},{"year":2012,"cited_by_count":17}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2016-06-24T00:00:00"}
