{"id":"https://openalex.org/W282286282","doi":"https://doi.org/10.7287/peerj.preprints.890v1","title":"Crossing the streams: a framework for streaming analysis of short DNA sequencing reads","display_name":"Crossing the streams: a framework for streaming analysis of short DNA sequencing reads","publication_year":2015,"publication_date":"2015-03-12","ids":{"openalex":"https://openalex.org/W282286282","doi":"https://doi.org/10.7287/peerj.preprints.890v1","mag":"282286282"},"language":"en","primary_location":{"id":"doi:10.7287/peerj.preprints.890v1","is_oa":true,"landing_page_url":"https://doi.org/10.7287/peerj.preprints.890v1","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"posted-content"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.7287/peerj.preprints.890v1","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052391290","display_name":"Qingpeng Zhang","orcid":"https://orcid.org/0000-0002-6819-0686"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Qingpeng Zhang","raw_affiliation_strings":["Computer Science and Engineering, Michigan State University, East Lansing, Michigan, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, Michigan State University, East Lansing, Michigan, USA","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036405743","display_name":"Sherine Awad","orcid":null},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]},{"id":"https://openalex.org/I84218800","display_name":"University of California, Davis","ror":"https://ror.org/05rrcem69","country_code":"US","type":"education","lineage":["https://openalex.org/I84218800"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sherine Awad","raw_affiliation_strings":["Microbiology and Molecular Genetics, Michigan State University, East Lansing, Michigan, USA","Population Health and Reproduction, University of California, Davis, Davis, California, USA"],"affiliations":[{"raw_affiliation_string":"Microbiology and Molecular Genetics, Michigan State University, East Lansing, Michigan, USA","institution_ids":["https://openalex.org/I87216513"]},{"raw_affiliation_string":"Population Health and Reproduction, University of California, Davis, Davis, California, USA","institution_ids":["https://openalex.org/I84218800"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036229691","display_name":"C. Titus Brown","orcid":"https://orcid.org/0000-0001-6001-2677"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]},{"id":"https://openalex.org/I84218800","display_name":"University of California, Davis","ror":"https://ror.org/05rrcem69","country_code":"US","type":"education","lineage":["https://openalex.org/I84218800"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Titus Brown","raw_affiliation_strings":["Computer Science and Engineering, Michigan State University, East Lansing, Michigan, USA","Microbiology and Molecular Genetics, Michigan State University, East Lansing, Michigan, USA","Population Health and Reproduction, University of California, Davis, Davis, California, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, Michigan State University, East Lansing, Michigan, USA","institution_ids":["https://openalex.org/I87216513"]},{"raw_affiliation_string":"Microbiology and Molecular Genetics, Michigan State University, East Lansing, Michigan, USA","institution_ids":["https://openalex.org/I87216513"]},{"raw_affiliation_string":"Population Health and Reproduction, University of California, Davis, Davis, California, USA","institution_ids":["https://openalex.org/I84218800"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5052391290"],"corresponding_institution_ids":["https://openalex.org/I87216513"],"apc_list":null,"apc_paid":null,"fwci":1.3128,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.79038867,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9850000143051147,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7224828004837036},{"id":"https://openalex.org/keywords/streaming-algorithm","display_name":"Streaming algorithm","score":0.7223109006881714},{"id":"https://openalex.org/keywords/trimming","display_name":"Trimming","score":0.5544092059135437},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.501488208770752},{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.4838391840457916},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.4551706910133362},{"id":"https://openalex.org/keywords/mit-license","display_name":"MIT License","score":0.4533204734325409},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.394910603761673},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33943450450897217},{"id":"https://openalex.org/keywords/dna","display_name":"DNA","score":0.15633493661880493},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.14513558149337769},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11698022484779358},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11192208528518677},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.09294715523719788}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7224828004837036},{"id":"https://openalex.org/C187166803","wikidata":"https://www.wikidata.org/wiki/Q2835831","display_name":"Streaming algorithm","level":3,"score":0.7223109006881714},{"id":"https://openalex.org/C56951928","wikidata":"https://www.wikidata.org/wiki/Q3539213","display_name":"Trimming","level":2,"score":0.5544092059135437},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.501488208770752},{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.4838391840457916},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.4551706910133362},{"id":"https://openalex.org/C174183944","wikidata":"https://www.wikidata.org/wiki/Q334661","display_name":"MIT License","level":3,"score":0.4533204734325409},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.394910603761673},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33943450450897217},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.15633493661880493},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.14513558149337769},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11698022484779358},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11192208528518677},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.09294715523719788},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.7287/peerj.preprints.890v1","is_oa":true,"landing_page_url":"https://doi.org/10.7287/peerj.preprints.890v1","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"posted-content"}],"best_oa_location":{"id":"doi:10.7287/peerj.preprints.890v1","is_oa":true,"landing_page_url":"https://doi.org/10.7287/peerj.preprints.890v1","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"posted-content"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1493892051","https://openalex.org/W1606920356","https://openalex.org/W1905818355","https://openalex.org/W1912338927","https://openalex.org/W2001079689","https://openalex.org/W2009735916","https://openalex.org/W2011657487","https://openalex.org/W2096128575","https://openalex.org/W2100670078","https://openalex.org/W2113287691","https://openalex.org/W2115546424","https://openalex.org/W2126419817","https://openalex.org/W2132341951","https://openalex.org/W2133956160","https://openalex.org/W2144982963","https://openalex.org/W2160834953","https://openalex.org/W2163830511","https://openalex.org/W2165050065","https://openalex.org/W2165753192","https://openalex.org/W2170551349","https://openalex.org/W2290761674"],"related_works":["https://openalex.org/W2467235537","https://openalex.org/W1493074871","https://openalex.org/W2222099502","https://openalex.org/W1979067309","https://openalex.org/W2375590729","https://openalex.org/W2385024427","https://openalex.org/W2978797270","https://openalex.org/W1972676838","https://openalex.org/W2155082390","https://openalex.org/W1969665739"],"abstract_inverted_index":{"We":[0,35],"present":[1],"a":[2,15,46,54,66],"semi-streaming":[3,49],"algorithm":[4],"for":[5,39,48,56],"k-mer":[6],"spectral":[7],"analysis":[8,41,58],"of":[9,59],"DNA":[10],"sequencing":[11],"reads,":[12],"together":[13],"with":[14],"derivative":[16],"approach":[17,23],"that":[18],"is":[19,80],"fully":[20],"streaming.":[21],"The":[22],"can":[24],"also":[25],"be":[26],"applied":[27],"to":[28],"genomic,":[29],"transcriptomic,":[30],"and":[31,53],"metagenomic":[32],"data":[33],"sets.":[34],"develop":[36],"two":[37],"tools":[38,71],"short-read":[40],"based":[42],"on":[43],"these":[44],"approaches,":[45],"method":[47,55],"k-mer-based":[50],"error":[51,60],"trimming,":[52],"the":[57,75,84],"profiles":[61],"in":[62,74],"short":[63],"reads":[64],"using":[65],"streaming":[67],"sublinear":[68],"approach.":[69],"These":[70],"are":[72],"implemented":[73],"khmer":[76],"software":[77],"package,":[78],"which":[79],"freely":[81],"available":[82],"under":[83],"BSD":[85],"License":[86],"at":[87],"github.com/ged-lab/khmer/":[88],".":[89]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-25T14:43:58.451035","created_date":"2025-10-10T00:00:00"}
