{"id":"https://openalex.org/W4381851895","doi":"https://doi.org/10.1093/bioinformatics/btad399","title":"S-leaping: an efficient downsampling method for large high-throughput sequencing data","display_name":"S-leaping: an efficient downsampling method for large high-throughput sequencing data","publication_year":2023,"publication_date":"2023-06-24","ids":{"openalex":"https://openalex.org/W4381851895","doi":"https://doi.org/10.1093/bioinformatics/btad399","pmid":"https://pubmed.ncbi.nlm.nih.gov/37354496"},"language":"en","primary_location":{"id":"doi:10.1093/bioinformatics/btad399","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btad399","pdf_url":"https://academic.oup.com/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btad399/50696354/btad399.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://academic.oup.com/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btad399/50696354/btad399.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005997934","display_name":"Hiroyuki Kuwahara","orcid":"https://orcid.org/0000-0001-5333-6729"},"institutions":[{"id":"https://openalex.org/I71920554","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38","country_code":"SA","type":"education","lineage":["https://openalex.org/I71920554"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Hiroyuki Kuwahara","raw_affiliation_strings":["Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE), Computational Bioscience Research Center (CBRC), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Saudi Arabia","King Abdullah University of Science and Technology (KAUST), Computational Bioscience Research Center (CBRC), Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE), Thuwal, 23955-6900, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0001-5333-6729","affiliations":[{"raw_affiliation_string":"Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE), Computational Bioscience Research Center (CBRC), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]},{"raw_affiliation_string":"King Abdullah University of Science and Technology (KAUST), Computational Bioscience Research Center (CBRC), Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE), Thuwal, 23955-6900, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100618900","display_name":"Xin Gao","orcid":"https://orcid.org/0000-0002-7108-3574"},"institutions":[{"id":"https://openalex.org/I71920554","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38","country_code":"SA","type":"education","lineage":["https://openalex.org/I71920554"]}],"countries":["SA"],"is_corresponding":true,"raw_author_name":"Xin Gao","raw_affiliation_strings":["Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE), Computational Bioscience Research Center (CBRC), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Saudi Arabia","King Abdullah University of Science and Technology (KAUST), Computational Bioscience Research Center (CBRC), Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE), Thuwal, 23955-6900, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0002-7108-3574","affiliations":[{"raw_affiliation_string":"Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE), Computational Bioscience Research Center (CBRC), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]},{"raw_affiliation_string":"King Abdullah University of Science and Technology (KAUST), Computational Bioscience Research Center (CBRC), Computer, Electrical and Mathematical Sciences and Engineering Division (CEMSE), Thuwal, 23955-6900, Saudi Arabia","institution_ids":["https://openalex.org/I71920554"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100618900"],"corresponding_institution_ids":["https://openalex.org/I71920554"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618},"apc_paid":{"value":3618,"currency":"USD","value_usd":3618},"fwci":0.5702,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.75038849,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"39","issue":"7","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.3614000082015991,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.3614000082015991,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.3107999861240387,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11287","display_name":"Cancer Genomics and Diagnostics","score":0.13500000536441803,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/upsampling","display_name":"Upsampling","score":0.9672431945800781},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8075319528579712},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.6162880659103394},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5329322218894958},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.49695757031440735},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.47198325395584106},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4487520456314087},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.41509148478507996},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24736705422401428},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07507851719856262}],"concepts":[{"id":"https://openalex.org/C110384440","wikidata":"https://www.wikidata.org/wiki/Q1143270","display_name":"Upsampling","level":3,"score":0.9672431945800781},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8075319528579712},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.6162880659103394},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5329322218894958},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.49695757031440735},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.47198325395584106},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4487520456314087},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.41509148478507996},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24736705422401428},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07507851719856262},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000073336","descriptor_name":"Whole Genome Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000073336","descriptor_name":"Whole Genome Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000073336","descriptor_name":"Whole Genome Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1093/bioinformatics/btad399","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btad399","pdf_url":"https://academic.oup.com/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btad399/50696354/btad399.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},{"id":"pmid:37354496","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37354496","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics (Oxford, England)","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:10318387","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/10318387","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC10318387/pdf/btad399.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Bioinformatics","raw_type":"Text"},{"id":"pmh:oai:repository.kaust.edu.sa:10754/692923","is_oa":true,"landing_page_url":"http://hdl.handle.net/10754/692923","pdf_url":null,"source":{"id":"https://openalex.org/S4306401596","display_name":"King Abdullah University of Science and Technology Repository (King Abdullah University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I71920554","host_organization_name":"King Abdullah University of Science and Technology","host_organization_lineage":["https://openalex.org/I71920554"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":{"id":"doi:10.1093/bioinformatics/btad399","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btad399","pdf_url":"https://academic.oup.com/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btad399/50696354/btad399.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4099999964237213}],"awards":[{"id":"https://openalex.org/G1825332442","display_name":null,"funder_award_id":"REI/1/5202-01-01","funder_id":"https://openalex.org/F4320322320","funder_display_name":"King Abdullah University of Science and Technology"},{"id":"https://openalex.org/G2058729345","display_name":null,"funder_award_id":"FCC/1/1976-45-01","funder_id":"https://openalex.org/F4320322320","funder_display_name":"King Abdullah University of Science and Technology"},{"id":"https://openalex.org/G3518144781","display_name":null,"funder_award_id":"URF/1/4663-01-01","funder_id":"https://openalex.org/F4320322320","funder_display_name":"King Abdullah University of Science and Technology"},{"id":"https://openalex.org/G7727508569","display_name":null,"funder_award_id":"FCC/1/1976-44-01, FCC/1/1976-45-01","funder_id":"https://openalex.org/F4320322320","funder_display_name":"King Abdullah University of Science and Technology"},{"id":"https://openalex.org/G7796938250","display_name":null,"funder_award_id":"FCC/1/1976-44-01, FCC/1/1976-45-01, URF/1/4663-01-01","funder_id":"https://openalex.org/F4320322320","funder_display_name":"King Abdullah University of Science and Technology"},{"id":"https://openalex.org/G8253667702","display_name":null,"funder_award_id":"FCC/1/1976-44-01","funder_id":"https://openalex.org/F4320322320","funder_display_name":"King Abdullah University of Science and Technology"},{"id":"https://openalex.org/G8644074954","display_name":null,"funder_award_id":"RGC/3/4816-01-01","funder_id":"https://openalex.org/F4320322320","funder_display_name":"King Abdullah University of Science and Technology"},{"id":"https://openalex.org/G8848196204","display_name":null,"funder_award_id":"REI/1/4940-01-01","funder_id":"https://openalex.org/F4320322320","funder_display_name":"King Abdullah University of Science and Technology"}],"funders":[{"id":"https://openalex.org/F4320322320","display_name":"King Abdullah University of Science and Technology","ror":"https://ror.org/01q3tbs38"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4381851895.pdf"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W2027002571","https://openalex.org/W2042321087","https://openalex.org/W2084924279","https://openalex.org/W2108234281","https://openalex.org/W2119180969","https://openalex.org/W2119885577","https://openalex.org/W2289130930","https://openalex.org/W2474836229","https://openalex.org/W2949838375","https://openalex.org/W2951529834","https://openalex.org/W2991501877","https://openalex.org/W3021014091","https://openalex.org/W3119770574","https://openalex.org/W3120295425","https://openalex.org/W3126861453","https://openalex.org/W3153630498","https://openalex.org/W4206754124","https://openalex.org/W6676286517","https://openalex.org/W6742114775"],"related_works":["https://openalex.org/W160116885","https://openalex.org/W2059929079","https://openalex.org/W2625512991","https://openalex.org/W2526884355","https://openalex.org/W2011083790","https://openalex.org/W2950363298","https://openalex.org/W2477853911","https://openalex.org/W1967383368","https://openalex.org/W2322403445","https://openalex.org/W2017976553"],"abstract_inverted_index":{"MOTIVATION:":[0],"Sequencing":[1],"coverage":[2,19],"is":[3,36,195],"among":[4],"key":[5],"determinants":[6],"considered":[7],"in":[8,83,125,175],"the":[9,41,75,101,108,148,192],"design":[10],"of":[11,30,43,77,86,141],"omics":[12],"studies.":[13],"To":[14,112],"help":[15],"estimate":[16],"cost-effective":[17],"sequencing":[18,44,129],"for":[20,187],"specific":[21,34],"downstream":[22],"analysis,":[23],"downsampling,":[24],"a":[25,33,84,120,142,176],"technique":[26],"to":[27,66,94,97,107,155,158,168,171],"sample":[28],"subsets":[29],"reads":[31],"with":[32,79,104,131,139,147,161],"size,":[35],"routinely":[37],"used.":[38],"However,":[39],"as":[40,189,191],"size":[42],"becomes":[45,50],"larger":[46],"and":[47,68,91,152],"larger,":[48],"downsampling":[49,59,81,89,110,150],"computationally":[51],"challenging.":[52],"RESULTS:":[53],"Here,":[54],"we":[55,118,135],"developed":[56,119],"an":[57],"approximate":[58],"method":[60],"called":[61,123],"s-leaping":[62,78,93,114],"that":[63,140],"was":[64],"designed":[65],"efficiently":[67],"accurately":[69],"process":[70],"large-size":[71],"data.":[72],"We":[73],"compared":[74,136],"performance":[76,138],"state-of-the-art":[80],"methods":[82],"range":[85],"practical":[87],"omics-study":[88],"settings":[90],"found":[92,153],"be":[95,156],"up":[96,157,170],"39%":[98],"faster":[99,160],"than":[100],"second-fastest":[102],"method,":[103],"comparable":[105],"accuracy":[106],"exact":[109],"methods.":[111],"apply":[113],"on":[115],"FASTQ":[116,145],"data,":[117],"light-weight":[121],"tool":[122,146],"fadso":[124,154,167,193],"C.":[126],"Using":[127],"whole-genome":[128],"data":[130],"208":[132],"million":[133],"reads,":[134],"fadso's":[137],"commonly":[143],"used":[144],"same":[149],"feature":[151],"12%":[159],"21%":[162],"lower":[163],"memory":[164],"usage,":[165],"suggesting":[166],"have":[169],"40%":[172],"higher":[173],"throughput":[174],"parallel":[177],"computing":[178],"setting.":[179],"AVAILABILITY":[180],"AND":[181],"IMPLEMENTATION:":[182],"The":[183],"C":[184],"source":[185],"code":[186],"s-leaping,":[188],"well":[190],"package":[194],"freely":[196],"available":[197],"at":[198],"https://github.com/hkuwahara/sleaping.":[199]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-16T07:32:37.131356","created_date":"2025-10-10T00:00:00"}
