{"id":"https://openalex.org/W2105583187","doi":"https://doi.org/10.1186/1471-2105-11-514","title":"Data structures and compression algorithms for high-throughput sequencing technologies","display_name":"Data structures and compression algorithms for high-throughput sequencing technologies","publication_year":2010,"publication_date":"2010-10-14","ids":{"openalex":"https://openalex.org/W2105583187","doi":"https://doi.org/10.1186/1471-2105-11-514","mag":"2105583187","pmid":"https://pubmed.ncbi.nlm.nih.gov/20946637"},"language":"en","primary_location":{"id":"doi:10.1186/1471-2105-11-514","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-11-514","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-11-514","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-11-514","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Kenny Daily","orcid":null},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kenny Daily","raw_affiliation_strings":["Department of Computer Science, University of California Irvine, Irvine, CA 92697 USA","Department of Computer Science, University of California Irvine, Irvine, CA, 92697, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of California Irvine, Irvine, CA 92697 USA","institution_ids":["https://openalex.org/I204250578"]},{"raw_affiliation_string":"Department of Computer Science, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078253034","display_name":"Paul Rigor","orcid":null},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Paul Rigor","raw_affiliation_strings":["Department of Computer Science, University of California Irvine, Irvine, CA, 92697, USA","Institute for Genomics and Bioinformatics, University of California Irvine, Irvine, CA, 92697, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]},{"raw_affiliation_string":"Institute for Genomics and Bioinformatics, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078078996","display_name":"Scott Christley","orcid":"https://orcid.org/0000-0002-9889-1221"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Scott Christley","raw_affiliation_strings":["Center for Complex Biological Systems, University of California Irvine, Irvine, CA, 92697, USA","Department of Computer Science, University of California Irvine, Irvine, CA, 92697, USA","Department of Mathematics, University of California Irvine, Irvine, CA, 92697, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Complex Biological Systems, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]},{"raw_affiliation_string":"Department of Computer Science, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]},{"raw_affiliation_string":"Department of Mathematics, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084618257","display_name":"Xiaohui Xie","orcid":"https://orcid.org/0000-0002-5479-6345"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaohui Xie","raw_affiliation_strings":["Center for Complex Biological Systems, University of California Irvine, Irvine, CA, 92697, USA","Department of Computer Science, University of California Irvine, Irvine, CA, 92697, USA","Institute for Genomics and Bioinformatics, University of California Irvine, Irvine, CA, 92697, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Complex Biological Systems, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]},{"raw_affiliation_string":"Department of Computer Science, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]},{"raw_affiliation_string":"Institute for Genomics and Bioinformatics, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088813478","display_name":"Pierre Baldi","orcid":"https://orcid.org/0000-0001-8752-4664"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Pierre Baldi","raw_affiliation_strings":["Center for Complex Biological Systems, University of California Irvine, Irvine, CA, 92697, USA","Department of Biological Chemistry, University of California Irvine, Irvine, CA, 92697, USA","Department of Computer Science, University of California Irvine, Irvine, CA, 92697, USA","Institute for Genomics and Bioinformatics, University of California Irvine, Irvine, CA, 92697, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Complex Biological Systems, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]},{"raw_affiliation_string":"Department of Biological Chemistry, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]},{"raw_affiliation_string":"Department of Computer Science, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]},{"raw_affiliation_string":"Institute for Genomics and Bioinformatics, University of California Irvine, Irvine, CA, 92697, USA","institution_ids":["https://openalex.org/I204250578"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5088813478"],"corresponding_institution_ids":["https://openalex.org/I204250578"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":8.8591,"has_fulltext":true,"cited_by_count":52,"citation_normalized_percentile":{"value":0.97755928,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"11","issue":"1","first_page":"514","last_page":"514"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9380000233650208,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9380000233650208,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12946","display_name":"Fractal and DNA sequence analysis","score":0.011300000362098217,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.008999999612569809,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6625719666481018},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.6290565133094788},{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.5610318183898926},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.502448320388794},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.48121407628059387},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.41211697459220886},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.4112699627876282},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3582487106323242},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.23716232180595398},{"id":"https://openalex.org/keywords/dna","display_name":"DNA","score":0.19707098603248596},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.14831247925758362},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.09729328751564026},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.0837261974811554}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6625719666481018},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.6290565133094788},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.5610318183898926},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.502448320388794},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.48121407628059387},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.41211697459220886},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.4112699627876282},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3582487106323242},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.23716232180595398},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.19707098603248596},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.14831247925758362},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.09729328751564026},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0837261974811554},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019277","descriptor_name":"Entropy","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019277","descriptor_name":"Entropy","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019277","descriptor_name":"Entropy","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D044962","descriptor_name":"Data Compression","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D044962","descriptor_name":"Data Compression","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D044962","descriptor_name":"Data Compression","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":7,"locations":[{"id":"doi:10.1186/1471-2105-11-514","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-11-514","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-11-514","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:20946637","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/20946637","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:927844ec9f7e47068f3663ce2bd68b82","is_oa":false,"landing_page_url":"https://doaj.org/article/927844ec9f7e47068f3663ce2bd68b82","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 11, Iss 1, p 514 (2010)","raw_type":"article"},{"id":"pmh:oai:escholarship.org/ark:/13030/qt6ts356bt","is_oa":true,"landing_page_url":"https://escholarship.org/uc/item/6ts356bt","pdf_url":null,"source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, vol 11","raw_type":"article"},{"id":"pmh:oai:escholarship.org/ark:/13030/qt87h1v24k","is_oa":false,"landing_page_url":"https://escholarship.org/uc/item/87h1v24k","pdf_url":null,"source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, vol 11, iss 1","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:2964686","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/2964686","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"},{"id":"pmh:qt87h1v24k","is_oa":false,"landing_page_url":"http://www.escholarship.org/uc/item/87h1v24k","pdf_url":null,"source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Daily, Kenny; Rigor, Paul; Christley, Scott; Xie, Xiaohui; &amp; Baldi, Pierre. (2010). Data structures and compression algorithms for high-throughput sequencing technologies. BMC Bioinformatics, 11(1), 514. doi: http://dx.doi.org/10.1186/1471-2105-11-514. Retrieved from: http://www.escholarship.org/uc/item/87h1v24k","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/1471-2105-11-514","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-11-514","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-11-514","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5199999809265137,"display_name":"Responsible consumption and production","id":"https://metadata.un.org/sdg/12"}],"awards":[{"id":"https://openalex.org/G249540480","display_name":null,"funder_award_id":"P30 CA062203","funder_id":"https://openalex.org/F4320337351","funder_display_name":"National Cancer Institute"},{"id":"https://openalex.org/G2890776206","display_name":"Large Scale Empirical Validation of the Aspect-Oriented Design Hypothesis","funder_award_id":"0725370","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2939380823","display_name":null,"funder_award_id":"CCF-0725370","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4503775571","display_name":null,"funder_award_id":"P50 GM76516","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G5923756965","display_name":null,"funder_award_id":"P50 GM76516","funder_id":"https://openalex.org/F4320337354","funder_display_name":"National Institute of General Medical Sciences"},{"id":"https://openalex.org/G6708216397","display_name":"Mining Structured Data with Applications in Chemistry and Biology","funder_award_id":"0513376","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6781161026","display_name":null,"funder_award_id":"NIH P50","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G77261453","display_name":null,"funder_award_id":"LM-07443-01","funder_id":"https://openalex.org/F4320337372","funder_display_name":"U.S. National Library of Medicine"},{"id":"https://openalex.org/G7817783119","display_name":null,"funder_award_id":"GM76516","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G8748278119","display_name":null,"funder_award_id":"P50 GM076516","funder_id":"https://openalex.org/F4320337354","funder_display_name":"National Institute of General Medical Sciences"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337351","display_name":"National Cancer Institute","ror":"https://ror.org/040gcmg81"},{"id":"https://openalex.org/F4320337354","display_name":"National Institute of General Medical Sciences","ror":"https://ror.org/04q48ey07"},{"id":"https://openalex.org/F4320337372","display_name":"U.S. National Library of Medicine","ror":"https://ror.org/0060t0j89"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2105583187.pdf","grobid_xml":"https://content.openalex.org/works/W2105583187.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W788498898","https://openalex.org/W1492896593","https://openalex.org/W1496422306","https://openalex.org/W1524920060","https://openalex.org/W1559631118","https://openalex.org/W1560200430","https://openalex.org/W1651818244","https://openalex.org/W1970231722","https://openalex.org/W1992371956","https://openalex.org/W2001725958","https://openalex.org/W2012016911","https://openalex.org/W2034424993","https://openalex.org/W2058422563","https://openalex.org/W2060108852","https://openalex.org/W2065579185","https://openalex.org/W2099111195","https://openalex.org/W2112113834","https://openalex.org/W2112509895","https://openalex.org/W2113649367","https://openalex.org/W2115613939","https://openalex.org/W2115824344","https://openalex.org/W2122554465","https://openalex.org/W2124985265","https://openalex.org/W2126829482","https://openalex.org/W2128777897","https://openalex.org/W2129652681","https://openalex.org/W2129795133","https://openalex.org/W2142642738","https://openalex.org/W2143124024","https://openalex.org/W2144514096","https://openalex.org/W2145058650","https://openalex.org/W2147492358","https://openalex.org/W2156389165","https://openalex.org/W2160484851","https://openalex.org/W2168909179","https://openalex.org/W2297165308","https://openalex.org/W2553682957","https://openalex.org/W2911940095","https://openalex.org/W4214573638","https://openalex.org/W4230511558","https://openalex.org/W4233459951","https://openalex.org/W4242673342"],"related_works":["https://openalex.org/W2136583354","https://openalex.org/W2111238207","https://openalex.org/W2760721665","https://openalex.org/W330130819","https://openalex.org/W2288610023","https://openalex.org/W2112044895","https://openalex.org/W3121416282","https://openalex.org/W1967080779","https://openalex.org/W2612632602","https://openalex.org/W2321805087"],"abstract_inverted_index":{"BACKGROUND:":[0],"High-throughput":[1],"sequencing":[2,17,32],"(HTS)":[3],"technologies":[4,46],"play":[5],"important":[6],"roles":[7],"in":[8,27,149,290,314],"the":[9,14,52,87,90,100,146,164,168,202,256,266,269,281],"life":[10],"sciences":[11],"by":[12,155],"allowing":[13],"rapid":[15],"parallel":[16],"of":[18,22,56,84,158,167,224,251,271],"very":[19],"large":[20,82],"numbers":[21],"relatively":[23],"short":[24,74,91],"nucleotide":[25],"sequences,":[26],"applications":[28],"ranging":[29],"from":[30,265],"genome":[31,79],"and":[33,38,54,64,99,107,117,126,136,171,175,190,274,299,309],"resequencing":[34],"to":[35,76,138,232,287,307],"digital":[36],"microarrays":[37],"ChIP-Seq":[39],"experiments.":[40],"As":[41],"experiments":[42],"scale":[43],"up,":[44],"HTS":[45,57,68,140,150,225,272,312],"create":[47],"new":[48,118,276],"bioinformatics":[49],"challenges":[50],"for":[51,67,221,279],"storage":[53],"sharing":[55],"data.":[58,69,226],"RESULTS:":[59],"We":[60,247],"develop":[61],"data":[62,141,169,235,313],"structures":[63],"compression":[65,184,206,302],"algorithms":[66,178,197,254],"A":[70],"processing":[71],"stage":[72],"maps":[73],"sequences":[75],"a":[77,81,156,315],"reference":[78],"or":[80,94,160],"table":[83],"sequences.":[85],"Then":[86],"integers":[88],"representing":[89],"sequence":[92,283],"absolute":[93],"relative":[95],"addresses,":[96],"their":[97,311],"length,":[98],"substitutions":[101],"they":[102],"may":[103],"contain":[104],"are":[105,198,230,285],"compressed":[106,154],"stored":[108],"using":[109,280],"various":[110,172,234],"entropy":[111],"coding":[112],"algorithms,":[113],"including":[114],"both":[115],"old":[116],"fixed":[119],"codes":[120,128],"(e.g":[121],"Golomb,":[122],"Elias":[123],"Gamma,":[124],"MOV)":[125],"variable":[127],"(e.g.":[129],"Huffman).":[130],"The":[131,292],"general":[132,182,204],"methodology":[133,293],"is":[134,210,261,297],"illustrated":[135],"applied":[137],"several":[139],"sets.":[142],"Results":[143],"show":[144,194],"that":[145,195,213],"information":[147],"contained":[148],"files":[151],"can":[152,241],"be":[153,219,242],"factor":[157],"10":[159],"more,":[161],"depending":[162],"on":[163],"statistical":[165],"properties":[166],"sets":[170],"other":[173],"choices":[174],"constraints.":[176],"Our":[177],"fair":[179],"well":[180],"against":[181],"purpose":[183,205],"programs":[185],"such":[186],"as":[187],"gzip,":[188],"bzip2":[189],"7zip;":[191],"timing":[192],"results":[193],"our":[196,252],"consistently":[199],"faster":[200],"than":[201,245],"best":[203],"programs.":[207],"CONCLUSIONS:":[208],"It":[209],"not":[211],"likely":[212],"exactly":[214],"one":[215,238],"encoding":[216,239,253],"strategy":[217,240],"will":[218],"optimal":[220],"all":[222],"types":[223],"Different":[227],"experimental":[228,277],"conditions":[229],"going":[231],"generate":[233],"distributions":[236],"whereby":[237],"more":[243,316],"effective":[244],"another.":[246],"have":[248,295],"implemented":[249],"some":[250],"into":[255],"software":[257],"package":[258],"GenCompress":[259],"which":[260],"available":[262],"upon":[263],"request":[264],"authors.":[267],"With":[268],"advent":[270],"technology":[273],"increasingly":[275],"protocols":[278],"technology,":[282],"databases":[284],"expected":[286],"continue":[288],"rising":[289],"size.":[291],"we":[294],"proposed":[296],"general,":[298],"these":[300],"advanced":[301],"techniques":[303],"should":[304],"allow":[305],"researchers":[306],"manage":[308],"share":[310],"timely":[317],"fashion.":[318]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":10},{"year":2012,"cited_by_count":4}],"updated_date":"2026-06-24T13:16:06.693445","created_date":"2025-10-10T00:00:00"}
