{"id":"https://openalex.org/W2020743485","doi":"https://doi.org/10.1145/2808719.2808739","title":"Rapid, separable compression enables fast analyses of sequence alignments","display_name":"Rapid, separable compression enables fast analyses of sequence alignments","publication_year":2015,"publication_date":"2015-09-09","ids":{"openalex":"https://openalex.org/W2020743485","doi":"https://doi.org/10.1145/2808719.2808739","mag":"2020743485"},"language":"en","primary_location":{"id":"doi:10.1145/2808719.2808739","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2808719.2808739","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2808739&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th ACM Conference on Bioinformatics, Computational Biology and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=2808739&type=pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070327849","display_name":"Darya Filippova","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]},{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Darya Filippova","raw_affiliation_strings":["Carngeie Mellon University, Pittsburgh, PA","Carngeie Mellon University, Pittsburgh, PA#TAB#"],"affiliations":[{"raw_affiliation_string":"Carngeie Mellon University, Pittsburgh, PA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carngeie Mellon University, Pittsburgh, PA#TAB#","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113653378","display_name":"Carl Kingsford","orcid":null},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]},{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carl Kingsford","raw_affiliation_strings":["Carngeie Mellon University, Pittsburgh, PA","Carngeie Mellon University, Pittsburgh, PA#TAB#"],"affiliations":[{"raw_affiliation_string":"Carngeie Mellon University, Pittsburgh, PA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carngeie Mellon University, Pittsburgh, PA#TAB#","institution_ids":["https://openalex.org/I170201317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5070327849"],"corresponding_institution_ids":["https://openalex.org/I170201317","https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":0.3018,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.61792724,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"194","last_page":"201"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10602","display_name":"Glycosylation and Glycoproteins Research","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6856204271316528},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6577630043029785},{"id":"https://openalex.org/keywords/separable-space","display_name":"Separable space","score":0.6030765175819397},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.5968974232673645},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5168848037719727},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33790940046310425},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17687129974365234},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.08989328145980835},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.055800557136535645}],"concepts":[{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6856204271316528},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6577630043029785},{"id":"https://openalex.org/C70710897","wikidata":"https://www.wikidata.org/wiki/Q680081","display_name":"Separable space","level":2,"score":0.6030765175819397},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.5968974232673645},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5168848037719727},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33790940046310425},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17687129974365234},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.08989328145980835},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.055800557136535645},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2808719.2808739","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2808719.2808739","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2808739&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th ACM Conference on Bioinformatics, Computational Biology and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/2808719.2808739","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2808719.2808739","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2808739&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th ACM Conference on Bioinformatics, Computational Biology and Health Informatics","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1080157019","display_name":null,"funder_award_id":"R21HG006913","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G1325684988","display_name":null,"funder_award_id":"CCF-1256087","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2933222473","display_name":"AF: Small: Multiscale Spectral Signatures for Local and Multi-objective Biological Network Alignment","funder_award_id":"1319998","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3068176952","display_name":null,"funder_award_id":"R01HG007104","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G3279864261","display_name":null,"funder_award_id":"HG006913","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G3355402586","display_name":null,"funder_award_id":"GBMF4554","funder_id":"https://openalex.org/F4320306202","funder_display_name":"Gordon and Betty Moore Foundation"},{"id":"https://openalex.org/G3641969089","display_name":null,"funder_award_id":"1256087","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4362417887","display_name":null,"funder_award_id":"R21HG006913, R01HG007104","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G6569375368","display_name":null,"funder_award_id":"HG007104","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G7958957864","display_name":null,"funder_award_id":"CCF-1256087, CCF-1319998","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8047711681","display_name":null,"funder_award_id":"to Carl Kingsford","funder_id":"https://openalex.org/F4320306151","funder_display_name":"Alfred P. Sloan Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306151","display_name":"Alfred P. Sloan Foundation","ror":"https://ror.org/052csg198"},{"id":"https://openalex.org/F4320306202","display_name":"Gordon and Betty Moore Foundation","ror":"https://ror.org/006wxqw41"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2020743485.pdf","grobid_xml":"https://content.openalex.org/works/W2020743485.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W109642580","https://openalex.org/W1999574084","https://openalex.org/W2021341670","https://openalex.org/W2042947822","https://openalex.org/W2050374641","https://openalex.org/W2051929999","https://openalex.org/W2092880969","https://openalex.org/W2100076391","https://openalex.org/W2101247207","https://openalex.org/W2108234281","https://openalex.org/W2111044311","https://openalex.org/W2141458291","https://openalex.org/W2143423077","https://openalex.org/W2155430004","https://openalex.org/W2159084616","https://openalex.org/W2159683766","https://openalex.org/W2159906372","https://openalex.org/W2165255717","https://openalex.org/W2166588423","https://openalex.org/W2166654698","https://openalex.org/W2168133698","https://openalex.org/W2169456326"],"related_works":["https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2009525028","https://openalex.org/W4321064619","https://openalex.org/W2053599029","https://openalex.org/W3011179836","https://openalex.org/W2962925412","https://openalex.org/W326456911","https://openalex.org/W2612632602","https://openalex.org/W2321805087"],"abstract_inverted_index":{"Continued":[0],"growth":[1],"of":[2,43,58,74,81,98,122,129,135,147,167],"generated":[3],"sequencing":[4,148,213],"data":[5,25,50,214],"demands":[6],"novel":[7,39],"scalable":[8],"approaches":[9],"to":[10,32,41,64,92,96,158,165,178,204,218],"its":[11,27],"storage":[12],"and":[13,104,119,183],"transmission.":[14],"It":[15],"is":[16,53,90,105,115],"also":[17],"crucial":[18],"that":[19,52,138,149],"analyses":[20],"can":[21],"be":[22],"run":[23],"on":[24],"in":[26],"compressed":[28],"form":[29],"without":[30],"having":[31],"fully":[33],"reconstruct":[34],"it.":[35],"We":[36],"propose":[37],"a":[38,47,56,133,141,173,220],"approach":[40],"compression":[42,182,190],"sequence":[44,77,185],"alignment":[45,68,94,207],"data,":[46],"well":[48],"established":[49],"format":[51],"used":[54],"for":[55,161],"variety":[57,134],"tasks":[59,137],"ranging":[60],"from":[61,156,209,215],"genome":[62],"assembly":[63],"variant":[65],"calling.":[66],"Such":[67],"files":[69,95],"may":[70],"exceed":[71],"the":[72,75,88,99,123,210],"size":[73,103],"original":[76,100],"by":[78,126],"an":[79,127],"order":[80],"magnitude,":[82],"however,":[83],"Referee,":[84],"our":[85],"tool":[86],"implementing":[87],"approach,":[89],"able":[91],"compress":[93],"1/10":[97],"SAM":[101],"file":[102],"twice":[106],"as":[107,109,163,192,194],"efficient":[108],"SAM's":[110],"binary":[111],"BAM":[112],"variant.":[113],"Referee":[114,162,171],"fast,":[116],"highly":[117],"parallelizable,":[118],"outperforms":[120],"state":[121],"art":[124],"tools":[125],"average":[128],"8.1%":[130],"while":[131],"enabling":[132],"sequence-related":[136],"require":[139],"only":[140],"partial":[142],"decompression.":[143],"Computations":[144],"like":[145],"depth":[146],"involve":[150],"seeking":[151],"through":[152],"all":[153],"alignments":[154],"take":[155],"8":[157],"44":[159],"seconds":[160],"opposed":[164],"tens":[166],"minutes":[168],"with":[169,189],"samtools.":[170],"uses":[172],"lightweight":[174],"streaming":[175],"clustering":[176],"algorithm":[177],"improve":[179],"quality":[180],"values":[181],"encodes":[184],"information":[186,208],"very":[187],"efficiently,":[188],"rates":[191],"low":[193],"0.06":[195],"bits":[196],"per":[197],"base.":[198],"Its":[199],"modular":[200],"structure":[201],"allows":[202],"one":[203],"omit":[205],"extraneous":[206],"download":[211],"reducing":[212],"many":[216],"gigabytes":[217],"under":[219],"hundred":[221],"megabytes.":[222]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2016,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
