{"id":"https://openalex.org/W2202973882","doi":"https://doi.org/10.1109/bibm.2015.7359838","title":"CoCo: An application to store High-Throughput Sequencing data in compact text and binary file formats","display_name":"CoCo: An application to store High-Throughput Sequencing data in compact text and binary file formats","publication_year":2015,"publication_date":"2015-11-01","ids":{"openalex":"https://openalex.org/W2202973882","doi":"https://doi.org/10.1109/bibm.2015.7359838","mag":"2202973882"},"language":"en","primary_location":{"id":"doi:10.1109/bibm.2015.7359838","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm.2015.7359838","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035061071","display_name":"Kamil Khanipov","orcid":"https://orcid.org/0000-0002-3881-737X"},"institutions":[{"id":"https://openalex.org/I44461941","display_name":"University of Houston","ror":"https://ror.org/048sx0r50","country_code":"US","type":"education","lineage":["https://openalex.org/I44461941"]},{"id":"https://openalex.org/I55302922","display_name":"The University of Texas Medical Branch at Galveston","ror":"https://ror.org/016tfm930","country_code":"US","type":"education","lineage":["https://openalex.org/I55302922"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kamil Khanipov","raw_affiliation_strings":["Department of Computer Science, University of Houston, Houston, TX","The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Houston, Houston, TX","institution_ids":["https://openalex.org/I44461941"]},{"raw_affiliation_string":"The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX","institution_ids":["https://openalex.org/I55302922"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001022644","display_name":"George Golovko","orcid":"https://orcid.org/0000-0003-4609-2767"},"institutions":[{"id":"https://openalex.org/I55302922","display_name":"The University of Texas Medical Branch at Galveston","ror":"https://ror.org/016tfm930","country_code":"US","type":"education","lineage":["https://openalex.org/I55302922"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Georgiy Golovko","raw_affiliation_strings":["The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX"],"affiliations":[{"raw_affiliation_string":"The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX","institution_ids":["https://openalex.org/I55302922"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111167062","display_name":"Mark Rojas","orcid":null},"institutions":[{"id":"https://openalex.org/I55302922","display_name":"The University of Texas Medical Branch at Galveston","ror":"https://ror.org/016tfm930","country_code":"US","type":"education","lineage":["https://openalex.org/I55302922"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mark Rojas","raw_affiliation_strings":["The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX"],"affiliations":[{"raw_affiliation_string":"The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX","institution_ids":["https://openalex.org/I55302922"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037759586","display_name":"Levent Albayrak","orcid":"https://orcid.org/0000-0002-4288-8170"},"institutions":[{"id":"https://openalex.org/I44461941","display_name":"University of Houston","ror":"https://ror.org/048sx0r50","country_code":"US","type":"education","lineage":["https://openalex.org/I44461941"]},{"id":"https://openalex.org/I55302922","display_name":"The University of Texas Medical Branch at Galveston","ror":"https://ror.org/016tfm930","country_code":"US","type":"education","lineage":["https://openalex.org/I55302922"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Levent Albayrak","raw_affiliation_strings":["Department of Computer Science, University of Houston, Houston, TX","The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Houston, Houston, TX","institution_ids":["https://openalex.org/I44461941"]},{"raw_affiliation_string":"The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX","institution_ids":["https://openalex.org/I55302922"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058351564","display_name":"Otto Dobretsberger","orcid":null},"institutions":[{"id":"https://openalex.org/I44461941","display_name":"University of Houston","ror":"https://ror.org/048sx0r50","country_code":"US","type":"education","lineage":["https://openalex.org/I44461941"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Otto Dobretsberger","raw_affiliation_strings":["Department of Computer Science, University of Houston, Houston, TX"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Houston, Houston, TX","institution_ids":["https://openalex.org/I44461941"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089518490","display_name":"Maria Pimenova","orcid":null},"institutions":[{"id":"https://openalex.org/I55302922","display_name":"The University of Texas Medical Branch at Galveston","ror":"https://ror.org/016tfm930","country_code":"US","type":"education","lineage":["https://openalex.org/I55302922"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maria Pimenova","raw_affiliation_strings":["The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX"],"affiliations":[{"raw_affiliation_string":"The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX","institution_ids":["https://openalex.org/I55302922"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015998985","display_name":"N E Olson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nels Olson","raw_affiliation_strings":["Nels A. Olson, Seattle, WA"],"affiliations":[{"raw_affiliation_string":"Nels A. Olson, Seattle, WA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108723492","display_name":"Sergei Chumakov","orcid":null},"institutions":[{"id":"https://openalex.org/I193181351","display_name":"Universidad de Guadalajara","ror":"https://ror.org/043xj7k26","country_code":"MX","type":"education","lineage":["https://openalex.org/I193181351"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Sergei Chumakov","raw_affiliation_strings":["Department of Physics, University of Guadalajara, Guadalajara, Jalisco, Mexico"],"affiliations":[{"raw_affiliation_string":"Department of Physics, University of Guadalajara, Guadalajara, Jalisco, Mexico","institution_ids":["https://openalex.org/I193181351"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027121994","display_name":"Yuriy Fofanov","orcid":"https://orcid.org/0000-0003-4240-4313"},"institutions":[{"id":"https://openalex.org/I55302922","display_name":"The University of Texas Medical Branch at Galveston","ror":"https://ror.org/016tfm930","country_code":"US","type":"education","lineage":["https://openalex.org/I55302922"]},{"id":"https://openalex.org/I44461941","display_name":"University of Houston","ror":"https://ror.org/048sx0r50","country_code":"US","type":"education","lineage":["https://openalex.org/I44461941"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuriy Fofanov","raw_affiliation_strings":["Department of Computer Science, University of Houston, Houston, TX","The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Houston, Houston, TX","institution_ids":["https://openalex.org/I44461941"]},{"raw_affiliation_string":"The Sealy Center for Structural Biology & Molecular Biophysics, University of Texas Medical Branch, Galveston, TX","institution_ids":["https://openalex.org/I55302922"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5035061071"],"corresponding_institution_ids":["https://openalex.org/I44461941","https://openalex.org/I55302922"],"apc_list":null,"apc_paid":null,"fwci":0.1459,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.56163436,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"331","issue":null,"first_page":"1117","last_page":"1122"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8293004035949707},{"id":"https://openalex.org/keywords/byte","display_name":"Byte","score":0.63917076587677},{"id":"https://openalex.org/keywords/file-size","display_name":"File size","score":0.5563197731971741},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5371482968330383},{"id":"https://openalex.org/keywords/file-format","display_name":"File format","score":0.5219489932060242},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.5218573808670044},{"id":"https://openalex.org/keywords/sorting","display_name":"Sorting","score":0.4703940153121948},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3423595726490021},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3311748802661896},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.14920395612716675},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.11355039477348328}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8293004035949707},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.63917076587677},{"id":"https://openalex.org/C2776029614","wikidata":"https://www.wikidata.org/wiki/Q1146367","display_name":"File size","level":2,"score":0.5563197731971741},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5371482968330383},{"id":"https://openalex.org/C97250363","wikidata":"https://www.wikidata.org/wiki/Q235557","display_name":"File format","level":2,"score":0.5219489932060242},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.5218573808670044},{"id":"https://openalex.org/C111696304","wikidata":"https://www.wikidata.org/wiki/Q2303697","display_name":"Sorting","level":2,"score":0.4703940153121948},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3423595726490021},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3311748802661896},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14920395612716675},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.11355039477348328},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bibm.2015.7359838","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm.2015.7359838","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5199999809265137,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1929174210","https://openalex.org/W1971072534","https://openalex.org/W2020255088","https://openalex.org/W2038595016","https://openalex.org/W2057791956","https://openalex.org/W2071381739","https://openalex.org/W2073162546","https://openalex.org/W2111727837","https://openalex.org/W2117608012","https://openalex.org/W2123497973","https://openalex.org/W2129504003","https://openalex.org/W2138196010","https://openalex.org/W2159683766","https://openalex.org/W2341781451","https://openalex.org/W3098364857","https://openalex.org/W4210727445"],"related_works":["https://openalex.org/W2189154201","https://openalex.org/W3039556430","https://openalex.org/W2374324126","https://openalex.org/W2081391536","https://openalex.org/W2210852504","https://openalex.org/W2371615582","https://openalex.org/W4239412715","https://openalex.org/W1981950836","https://openalex.org/W2587020896","https://openalex.org/W2146547297"],"abstract_inverted_index":{"The":[0,29,147,233,249,273],"storage,":[1],"manipulation,":[2],"and":[3,44,46,52,97,144,176,186,246,271,314,322,330,343],"especially":[4],"internet":[5],"transfer":[6],"of":[7,10,25,82,112,121,128,157,161,173,178,188,191,208,214,225,236,275,305,338],"large":[8],"amounts":[9],"data":[11,68,96,104,114,340],"produced":[12,40],"by":[13,116,288],"High-Throughput":[14],"Sequencing":[15],"(HTS)":[16],"instruments":[17],"present":[18,125,167,301],"major":[19],"obstacles":[20],"utilizing":[21],"the":[22,76,80,180,189,196,209,212,215,226,237,260,278,289,294,297,302,306],"full":[23],"potential":[24],"this":[26],"promising":[27],"technology.":[28],"current":[30],"standard":[31],"is":[32,241,263,285],"based":[33,242],"on":[34,206,243],"storing":[35],"all":[36,174,336],"data,":[37,84,211],"which":[38],"are":[39,166],"in":[41,49,70,79,140,168,198,277],"text":[42,143],"(FASTQ":[43],"FASTA)":[45],"often":[47],"stored":[48],"binary":[50,145,199],"(SRA":[51],"BAM)":[53],"formats.":[54,73,200],"To":[55],"date,":[56],"significant":[57],"effort":[58],"has":[59],"been":[60],"devoted":[61],"to":[62,92,109,133,151,194,252,265,282,292,311,333],"efficiently":[63],"compressing":[64],"these":[65],"cumbersome":[66],"sequencing":[67,138,210,339],"sets":[69],"their":[71],"existing":[72],"However,":[74],"given":[75],"substantial":[77],"improvements":[78],"quality":[81,95,137],"HTS":[83,113],"we":[85,124],"believe":[86],"that":[87],"if":[88],"one":[89,159],"can":[90,106,218,356],"afford":[91],"exclude":[93],"low":[94,221],"read":[98,293],"headers,":[99],"new":[100],"much":[101],"more":[102],"compressed":[103],"formats":[105,130,239,262,280],"be":[107,219,357],"used":[108,150],"reduce":[110],"size":[111,154,203,213,345],"files":[115,217,258,276],"at":[117],"least":[118],"two":[119,323],"orders":[120],"magnitude.":[122],"Here":[123],"several":[126],"examples":[127],"file":[129,153,202,295],"specifically":[131],"designed":[132],"store":[134,195],"only":[135,158,179,287],"high":[136],"reads":[139,165,175,254],"space":[141],"efficient":[142],"form.":[146],"basic":[148],"principles":[149],"decrease":[152],"include":[155],"storage":[156,177],"copy":[160],"a":[162],"sequence":[163],"when":[164],"multiple":[169],"copies;":[170],"alphabetical":[171],"sorting":[172],"differences":[181],"(suffixes)":[182],"between":[183],"consecutive":[184],"reads;":[185],"optimization":[187],"number":[190],"bits/bytes":[192],"required":[193,251,291],"information":[197],"While":[201],"reduction":[204],"depends":[205],"properties":[207],"resulting":[216],"as":[220,222,351,353],"0.1":[223],"%-5%":[224],"original":[227],"FASTQ,":[228],"SRA,":[229],"or":[230],"BAM":[231],"files.":[232],"greatest":[234],"advantage":[235],"proposed":[238,261,279],"however,":[240],"its":[244],"time":[245,250,290],"memory":[247],"efficiency.":[248],"convert":[253],"from":[255,296,359],"FASTQ/FAST":[256],"A":[257,284],"into":[259],"up":[264],"10":[266],"times":[267],"faster":[268],"than":[269],"gzip":[270],"SRA.":[272],"conversion":[274],"back":[281],"FAST":[283],"limited":[286],"hard":[298],"drive.":[299],"We":[300],"source":[303],"code":[304],"C++":[307],"object":[308],"(class)":[309],"implemented":[310],"store,":[312],"sort,":[313],"perform":[315],"I/O":[316],"operations":[317],"with":[318,335],"equal":[319],"length":[320],"subsequences;":[321],"executable":[324],"LINUX":[325],"command":[326],"line":[327],"applications":[328],"(CoCo":[329],"CoCo-PIus)":[331],"able":[332],"work":[334],"types":[337],"including":[341],"paired-end":[342],"flexible":[344],"reads.":[346],"Source":[347],"code,":[348],"Linux":[349],"executables,":[350],"well":[352],"user":[354],"manual":[355],"downloaded":[358],"http://bgl.utmb.edu/publications/34cocoplus.":[360]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
