{"id":"https://openalex.org/W4211043198","doi":"https://doi.org/10.1371/journal.pcbi.1009757","title":"Ten simple rules for large-scale data processing","display_name":"Ten simple rules for large-scale data processing","publication_year":2022,"publication_date":"2022-02-10","ids":{"openalex":"https://openalex.org/W4211043198","doi":"https://doi.org/10.1371/journal.pcbi.1009757","pmid":"https://pubmed.ncbi.nlm.nih.gov/35143491"},"language":"en","primary_location":{"id":"doi:10.1371/journal.pcbi.1009757","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1009757","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1009757&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"type":"editorial","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1009757&type=printable","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052377950","display_name":"Arkarachai Fungtammasan","orcid":"https://orcid.org/0000-0003-2398-0358"},"institutions":[{"id":"https://openalex.org/I4210130836","display_name":"DNAnexus (United States)","ror":"https://ror.org/043pjwk57","country_code":"US","type":"company","lineage":["https://openalex.org/I4210130836"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Arkarachai Fungtammasan","raw_affiliation_strings":["DNAnexus, Inc., Mountain View, California, United States of America"],"affiliations":[{"raw_affiliation_string":"DNAnexus, Inc., Mountain View, California, United States of America","institution_ids":["https://openalex.org/I4210130836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020020080","display_name":"Alexandra Lee","orcid":"https://orcid.org/0000-0002-0208-3730"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexandra Lee","raw_affiliation_strings":["Genomics and Computational Biology Graduate Program, University of Pennsylvania, Philadelphia, Pennsylvania, United States of America"],"affiliations":[{"raw_affiliation_string":"Genomics and Computational Biology Graduate Program, University of Pennsylvania, Philadelphia, Pennsylvania, United States of America","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069189563","display_name":"Jaclyn Taroni","orcid":"https://orcid.org/0000-0003-4734-4508"},"institutions":[{"id":"https://openalex.org/I4210126766","display_name":"Alex's Lemonade Stand Foundation","ror":"https://ror.org/038ja4880","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210126766"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaclyn Taroni","raw_affiliation_strings":["Childhood Cancer Data Lab, Alex's Lemonade Stand Foundation, Philadelphia, Pennsylvania, United States of America"],"affiliations":[{"raw_affiliation_string":"Childhood Cancer Data Lab, Alex's Lemonade Stand Foundation, Philadelphia, Pennsylvania, United States of America","institution_ids":["https://openalex.org/I4210126766"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017381430","display_name":"K. Wheeler","orcid":"https://orcid.org/0000-0002-0640-2903"},"institutions":[{"id":"https://openalex.org/I4210126766","display_name":"Alex's Lemonade Stand Foundation","ror":"https://ror.org/038ja4880","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210126766"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kurt Wheeler","raw_affiliation_strings":["Childhood Cancer Data Lab, Alex's Lemonade Stand Foundation, Philadelphia, Pennsylvania, United States of America"],"affiliations":[{"raw_affiliation_string":"Childhood Cancer Data Lab, Alex's Lemonade Stand Foundation, Philadelphia, Pennsylvania, United States of America","institution_ids":["https://openalex.org/I4210126766"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000764422","display_name":"Chen-Shan Chin","orcid":"https://orcid.org/0000-0003-4394-2455"},"institutions":[{"id":"https://openalex.org/I4210130836","display_name":"DNAnexus (United States)","ror":"https://ror.org/043pjwk57","country_code":"US","type":"company","lineage":["https://openalex.org/I4210130836"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chen-Shan Chin","raw_affiliation_strings":["DNAnexus, Inc., Mountain View, California, United States of America"],"affiliations":[{"raw_affiliation_string":"DNAnexus, Inc., Mountain View, California, United States of America","institution_ids":["https://openalex.org/I4210130836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005450827","display_name":"Sean Davis","orcid":"https://orcid.org/0000-0002-8991-6458"},"institutions":[{"id":"https://openalex.org/I51713134","display_name":"University of Colorado Anschutz Medical Campus","ror":"https://ror.org/03wmf1y16","country_code":"US","type":"education","lineage":["https://openalex.org/I51713134"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sean Davis","raw_affiliation_strings":["Center for Health AI, University of Colorado Anschutz School of Medicine, Aurora, Colorado, United States of America","Department of Medicine, Divisions of Medical Oncology and Hematology, University of Colorado Anschutz School of Medicine, Aurora, Colorado, United States of America"],"affiliations":[{"raw_affiliation_string":"Center for Health AI, University of Colorado Anschutz School of Medicine, Aurora, Colorado, United States of America","institution_ids":["https://openalex.org/I51713134"]},{"raw_affiliation_string":"Department of Medicine, Divisions of Medical Oncology and Hematology, University of Colorado Anschutz School of Medicine, Aurora, Colorado, United States of America","institution_ids":["https://openalex.org/I51713134"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050692257","display_name":"Casey S. Greene","orcid":"https://orcid.org/0000-0001-8713-9213"},"institutions":[{"id":"https://openalex.org/I51713134","display_name":"University of Colorado Anschutz Medical Campus","ror":"https://ror.org/03wmf1y16","country_code":"US","type":"education","lineage":["https://openalex.org/I51713134"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Casey Greene","raw_affiliation_strings":["Center for Health AI, University of Colorado Anschutz School of Medicine, Aurora, Colorado, United States of America","Department of Biochemistry and Molecular Genetics, University of Colorado Anschutz School of Medicine, Aurora, Colorado, United States of America"],"affiliations":[{"raw_affiliation_string":"Center for Health AI, University of Colorado Anschutz School of Medicine, Aurora, Colorado, United States of America","institution_ids":["https://openalex.org/I51713134"]},{"raw_affiliation_string":"Department of Biochemistry and Molecular Genetics, University of Colorado Anschutz School of Medicine, Aurora, Colorado, United States of America","institution_ids":["https://openalex.org/I51713134"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5000764422","https://openalex.org/A5005450827","https://openalex.org/A5050692257","https://openalex.org/A5052377950"],"corresponding_institution_ids":["https://openalex.org/I4210130836","https://openalex.org/I51713134"],"apc_list":{"value":2655,"currency":"USD","value_usd":2655},"apc_paid":{"value":2655,"currency":"USD","value_usd":2655},"fwci":3.2519,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.93513056,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"18","issue":"2","first_page":"e1009757","last_page":"e1009757"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.6874185800552368},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5667689442634583},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5308120250701904},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33537954092025757},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.32220640778541565},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.11405503749847412},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.10731399059295654}],"concepts":[{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.6874185800552368},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5667689442634583},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5308120250701904},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33537954092025757},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.32220640778541565},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.11405503749847412},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.10731399059295654},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D017408","descriptor_name":"Guidelines as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D017408","descriptor_name":"Guidelines as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D017408","descriptor_name":"Guidelines as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":4,"locations":[{"id":"doi:10.1371/journal.pcbi.1009757","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1009757","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1009757&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},{"id":"pmid:35143491","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35143491","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS computational biology","raw_type":null},{"id":"pmh:oai:doaj.org/article:baa53f335bf04bcaa44a67b2ac7e964a","is_oa":true,"landing_page_url":"https://doaj.org/article/baa53f335bf04bcaa44a67b2ac7e964a","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Computational Biology, Vol 18, Iss 2, p e1009757 (2022)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:8830682","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8830682","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Comput Biol","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1371/journal.pcbi.1009757","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1009757","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1009757&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1729374867","display_name":null,"funder_award_id":"R01 HG010067","funder_id":"https://openalex.org/F4320337348","funder_display_name":"National Human Genome Research Institute"},{"id":"https://openalex.org/G2769988617","display_name":null,"funder_award_id":"GBMF 4552","funder_id":"https://openalex.org/F4320306202","funder_display_name":"Gordon and Betty Moore Foundation"},{"id":"https://openalex.org/G516917286","display_name":null,"funder_award_id":"HG010067","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G899795625","display_name":null,"funder_award_id":"R01 HG010067","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320306202","display_name":"Gordon and Betty Moore Foundation","ror":"https://ror.org/006wxqw41"},{"id":"https://openalex.org/F4320306589","display_name":"Alex's Lemonade Stand Foundation for Childhood Cancer","ror":"https://ror.org/038ja4880"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337348","display_name":"National Human Genome Research Institute","ror":"https://ror.org/00baak391"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4211043198.pdf","grobid_xml":"https://content.openalex.org/works/W4211043198.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W2026532911","https://openalex.org/W2036318837","https://openalex.org/W2106031730","https://openalex.org/W2110417468","https://openalex.org/W2114843025","https://openalex.org/W2539403003","https://openalex.org/W2598912916","https://openalex.org/W2605897695","https://openalex.org/W2606715885","https://openalex.org/W2901027836","https://openalex.org/W2953067954","https://openalex.org/W3087789644","https://openalex.org/W3104000916","https://openalex.org/W3165462637","https://openalex.org/W3216633781","https://openalex.org/W4292779060","https://openalex.org/W6778883912"],"related_works":["https://openalex.org/W4238075012","https://openalex.org/W2347219288","https://openalex.org/W2366221835","https://openalex.org/W2022005319","https://openalex.org/W2358687537","https://openalex.org/W2608950002","https://openalex.org/W4281832968","https://openalex.org/W4289786569","https://openalex.org/W1984735105","https://openalex.org/W2566167881"],"abstract_inverted_index":{"Exabytes":[0],"of":[1,25,33,85,92,106,118],"images,":[2],"sequences,":[3],"tabular":[4],"data,":[5,119],"and":[6,45,67],"unstructured":[7],"data":[8,18,51,68,80,127],"are":[9],"now":[10],"available":[11],"for":[12,96,129],"analysis":[13,52,75,115],"to":[14,21,60,124],"advance":[15],"science.":[16],"These":[17],"support":[19],"efforts":[20],"visualize":[22],"the":[23,26,31,79,83,90,104,107,112,130],"image":[24],"black":[27],"hole":[28],"[1],":[29],"characterize":[30],"spectrum":[32],"mutations":[34],"across":[35],"cancer":[36],"types":[37],"[2],":[38],"create":[39],"a":[40,50,62],"sophisticated":[41],"language":[42],"model":[43],"[3],":[44],"many":[46],"other":[47],"tasks.":[48],"Defining":[49],"as":[53,65],"large":[54,76],"scale":[55,77],"or":[56,88],"not":[57],"is":[58],"likely":[59],"be":[61,125],"moving":[63],"target":[64],"computing":[66,98],"transfer":[69],"technologies":[70],"advance.":[71],"We":[72],"consider":[73,122],"an":[74],"when":[78,89],"size":[81],"exceeds":[82],"capacity":[84,101],"local":[86],"resources":[87],"amount":[91],"time":[93],"spent":[94],"waiting":[95],"high-performance":[97],"(HPC)":[99],"compute":[100],"would":[102],"disrupt":[103],"pace":[105],"research":[108],"project.":[109],"For":[110],"example,":[111],"recount2":[113],"[4]":[114],"processed":[116],"petabytes":[117],"so":[120],"we":[121],"it":[123],"large-scale":[126],"processing":[128],"current":[131],"day":[132],"standard.":[133]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
