{"id":"https://openalex.org/W4236345772","doi":"https://doi.org/10.12688/f1000research.23180.1","title":"Jupyter notebook-based tools for building structured datasets from the Sequence Read Archive","display_name":"Jupyter notebook-based tools for building structured datasets from the Sequence Read Archive","publication_year":2020,"publication_date":"2020-05-19","ids":{"openalex":"https://openalex.org/W4236345772","doi":"https://doi.org/10.12688/f1000research.23180.1"},"language":"en","primary_location":{"id":"doi:10.12688/f1000research.23180.1","is_oa":true,"landing_page_url":"https://doi.org/10.12688/f1000research.23180.1","pdf_url":"https://f1000research.com/articles/9-376/v1/pdf","source":{"id":"https://openalex.org/S4210239046","display_name":"F1000Research","issn_l":"2046-1402","issn":["2046-1402"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320357","host_organization_name":"Faculty of 1000","host_organization_lineage":["https://openalex.org/P4310320357"],"host_organization_lineage_names":["Faculty of 1000"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"F1000Research","raw_type":"journal-article"},"type":"preprint","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://f1000research.com/articles/9-376/v1/pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016092157","display_name":"Matthew N. Bernstein","orcid":"https://orcid.org/0000-0002-1810-5252"},"institutions":[{"id":"https://openalex.org/I29680605","display_name":"Morgridge Institute for Research","ror":"https://ror.org/05cb4rb43","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I135310074","https://openalex.org/I29680605"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Matthew N. Bernstein","raw_affiliation_strings":["Morgridge Institute for Research, Madison, Wisconsin, 53715, USA"],"affiliations":[{"raw_affiliation_string":"Morgridge Institute for Research, Madison, Wisconsin, 53715, USA","institution_ids":["https://openalex.org/I29680605"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003093769","display_name":"Ariella Gladstein","orcid":"https://orcid.org/0000-0001-7735-2336"},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ariella Gladstein","raw_affiliation_strings":["Department of Genetics, University of North Carolina at Chapel Hill, Chapel Hill, North Carolina, 27599, USA"],"affiliations":[{"raw_affiliation_string":"Department of Genetics, University of North Carolina at Chapel Hill, Chapel Hill, North Carolina, 27599, USA","institution_ids":["https://openalex.org/I114027177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009696005","display_name":"Khun Zaw Latt","orcid":"https://orcid.org/0000-0001-9754-8723"},"institutions":[{"id":"https://openalex.org/I4210090567","display_name":"National Institute of Diabetes and Digestive and Kidney Diseases","ror":"https://ror.org/00adh9b73","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I4210090567"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Khun Zaw Latt","raw_affiliation_strings":["Kidney Disease Branch, National Institute of Diabetes and Digestive and Kidney Diseases, Bethesda, Maryland, 20892, USA"],"affiliations":[{"raw_affiliation_string":"Kidney Disease Branch, National Institute of Diabetes and Digestive and Kidney Diseases, Bethesda, Maryland, 20892, USA","institution_ids":["https://openalex.org/I4210090567"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087957891","display_name":"Emily Clough","orcid":"https://orcid.org/0009-0003-7602-1369"},"institutions":[{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Emily Clough","raw_affiliation_strings":["National Center for Biotechnology Information NLM, Bethesda, Maryland, 20894, USA"],"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information NLM, Bethesda, Maryland, 20894, USA","institution_ids":["https://openalex.org/I4210109390"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070603357","display_name":"Ben Busby","orcid":"https://orcid.org/0000-0001-5267-4988"},"institutions":[{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ben Busby","raw_affiliation_strings":["National Center for Biotechnology Information NLM, Bethesda, Maryland, 20894, USA"],"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information NLM, Bethesda, Maryland, 20894, USA","institution_ids":["https://openalex.org/I4210109390"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068671730","display_name":"Allissa Dillman","orcid":"https://orcid.org/0000-0003-0757-048X"},"institutions":[{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Allissa Dillman","raw_affiliation_strings":["National Center for Biotechnology Information NLM, Bethesda, Maryland, 20894, USA"],"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information NLM, Bethesda, Maryland, 20894, USA","institution_ids":["https://openalex.org/I4210109390"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5016092157"],"corresponding_institution_ids":["https://openalex.org/I29680605"],"apc_list":{"value":1350,"currency":"USD","value_usd":1350},"apc_paid":{"value":1350,"currency":"USD","value_usd":1350},"fwci":0.4187,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.61423062,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"9","issue":null,"first_page":"376","last_page":"376"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7401613593101501},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47705522179603577},{"id":"https://openalex.org/keywords/raw-data","display_name":"Raw data","score":0.4730129539966583},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.4554142951965332},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.3449613153934479},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.3253400921821594},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2358030080795288},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.0783071219921112}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7401613593101501},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47705522179603577},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.4730129539966583},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.4554142951965332},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3449613153934479},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.3253400921821594},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2358030080795288},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0783071219921112},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.12688/f1000research.23180.1","is_oa":true,"landing_page_url":"https://doi.org/10.12688/f1000research.23180.1","pdf_url":"https://f1000research.com/articles/9-376/v1/pdf","source":{"id":"https://openalex.org/S4210239046","display_name":"F1000Research","issn_l":"2046-1402","issn":["2046-1402"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320357","host_organization_name":"Faculty of 1000","host_organization_lineage":["https://openalex.org/P4310320357"],"host_organization_lineage_names":["Faculty of 1000"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"F1000Research","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.12688/f1000research.23180.1","is_oa":true,"landing_page_url":"https://doi.org/10.12688/f1000research.23180.1","pdf_url":"https://f1000research.com/articles/9-376/v1/pdf","source":{"id":"https://openalex.org/S4210239046","display_name":"F1000Research","issn_l":"2046-1402","issn":["2046-1402"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320357","host_organization_name":"Faculty of 1000","host_organization_lineage":["https://openalex.org/P4310320357"],"host_organization_lineage_names":["Faculty of 1000"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"F1000Research","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7200000286102295,"id":"https://metadata.un.org/sdg/1","display_name":"No poverty"}],"awards":[{"id":"https://openalex.org/G8649589624","display_name":null,"funder_award_id":"2018-182626","funder_id":"https://openalex.org/F4320315474","funder_display_name":"Chan Zuckerberg Initiative"}],"funders":[{"id":"https://openalex.org/F4320306192","display_name":"Silicon Valley Community Foundation","ror":"https://ror.org/001ader08"},{"id":"https://openalex.org/F4320315474","display_name":"Chan Zuckerberg Initiative","ror":"https://ror.org/02qenvm24"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337372","display_name":"U.S. National Library of Medicine","ror":"https://ror.org/0060t0j89"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4236345772.pdf","grobid_xml":"https://content.openalex.org/works/W4236345772.grobid-xml"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W1757909678","https://openalex.org/W2011301426","https://openalex.org/W2116034137","https://openalex.org/W2116041602","https://openalex.org/W2147018385","https://openalex.org/W2181616869","https://openalex.org/W2805978167","https://openalex.org/W2899915536","https://openalex.org/W2949612967","https://openalex.org/W2950750198","https://openalex.org/W3100689701"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2082860237","https://openalex.org/W2119695867","https://openalex.org/W2130076355","https://openalex.org/W1990804418","https://openalex.org/W1993764875","https://openalex.org/W2046158694","https://openalex.org/W2788277189","https://openalex.org/W2013243191","https://openalex.org/W1971568933"],"abstract_inverted_index":{"<ns4:p>":[0],"The":[1,99,131],"Sequence":[2],"Read":[3],"Archive":[4],"(SRA)":[5],"is":[6],"a":[7,70,114,155,167],"large":[8],"public":[9],"repository":[10],"that":[11,44,76],"stores":[12],"raw":[13],"next-generation":[14],"sequencing":[15],"data":[16,31],"from":[17,62,84],"thousands":[18],"of":[19,29,41,72,93,142,147,166,178],"diverse":[20],"scientific":[21],"investigations.":[22],"Despite":[23],"its":[24,46],"promise,":[25],"reuse":[26],"and":[27,38,110,122],"re-analysis":[28],"SRA":[30,86],"has":[32],"been":[33],"challenged":[34],"by":[35,56,126],"the":[36,42,50,78,85,94,103,120,135,145,164,176],"heterogeneity":[37],"poor":[39],"quality":[40],"metadata":[43,55],"describe":[45],"biological":[47,149],"samples.":[48],"Recently,":[49],"MetaSRA":[51,79],"project":[52],"standardized":[53],"these":[54],"annotating":[57],"each":[58],"sample":[59],"with":[60],"terms":[61],"biomedical":[63],"ontologies.":[64],"In":[65],"this":[66],"work,":[67],"we":[68],"present":[69],"pair":[71],"Jupyter":[73],"notebook-based":[74],"tools":[75,162],"utilize":[77],"for":[80,113,144],"building":[81],"structured":[82],"datasets":[83],"in":[87,171],"order":[88],"to":[89,152],"facilitate":[90],"secondary":[91],"analyses":[92],"SRA\u2019s":[95],"human":[96],"RNA-seq":[97],"data.":[98],"first":[100],"tool,":[101,133],"called":[102,134],"<ns4:italic>Case-Control":[104],"Finder</ns4:italic>":[105,137],",":[106,138],"finds":[107,139],"suitable":[108],"case":[109],"control":[111],"samples":[112,143],"given":[115],"disease":[116],"or":[117,128],"condition":[118],"where":[119],"cases":[121],"controls":[123],"are":[124],"matched":[125],"tissue":[127],"cell":[129],"type.":[130],"second":[132],"<ns4:italic>Series":[136],"ordered":[140],"sets":[141],"purpose":[146],"addressing":[148],"questions":[150],"pertaining":[151],"changes":[153],"over":[154],"numerical":[156],"property":[157],"such":[158],"as":[159],"time.":[160],"These":[161],"were":[163],"result":[165],"three-day-long":[168],"NCBI":[169],"Codeathon":[170],"March":[172],"2019":[173],"held":[174],"at":[175,181],"University":[177],"North":[179],"Carolina":[180],"Chapel":[182],"Hill.":[183],"</ns4:p>":[184]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-01T06:05:34.837733","created_date":"2025-10-10T00:00:00"}
