{"id":"https://openalex.org/W3024934024","doi":"https://doi.org/10.1109/tc.2020.2994774","title":"GenoDedup: Similarity-Based Deduplication and Delta-Encoding for Genome Sequencing Data","display_name":"GenoDedup: Similarity-Based Deduplication and Delta-Encoding for Genome Sequencing Data","publication_year":2020,"publication_date":"2020-05-16","ids":{"openalex":"https://openalex.org/W3024934024","doi":"https://doi.org/10.1109/tc.2020.2994774","mag":"3024934024"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2020.2994774","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2020.2994774","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://repositorio.inesctec.pt/handle/123456789/11369","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000397453","display_name":"Vinicius V. Cogo","orcid":"https://orcid.org/0000-0002-1299-8950"},"institutions":[{"id":"https://openalex.org/I141596103","display_name":"University of Lisbon","ror":"https://ror.org/01c27hj86","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103"]}],"countries":["PT"],"is_corresponding":true,"raw_author_name":"Vinicius Cogo","raw_affiliation_strings":["LASIGE, Faculdade de Ci\u00eancias, Universidade de Lisboa, Lisboa, Portugal"],"affiliations":[{"raw_affiliation_string":"LASIGE, Faculdade de Ci\u00eancias, Universidade de Lisboa, Lisboa, Portugal","institution_ids":["https://openalex.org/I141596103"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054409160","display_name":"Jo\u00e3o Paulo","orcid":"https://orcid.org/0000-0001-9752-2822"},"institutions":[{"id":"https://openalex.org/I4210166615","display_name":"INESC TEC","ror":"https://ror.org/05fa8ka61","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I4210125590","https://openalex.org/I4210166615"]},{"id":"https://openalex.org/I99682543","display_name":"University of Minho","ror":"https://ror.org/037wpkx04","country_code":"PT","type":"education","lineage":["https://openalex.org/I99682543"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Joao Paulo","raw_affiliation_strings":["HASLab\u2014High-Assurance Software Lab, INESC TEC, Universidade do Minho, Braga, Portugal"],"affiliations":[{"raw_affiliation_string":"HASLab\u2014High-Assurance Software Lab, INESC TEC, Universidade do Minho, Braga, Portugal","institution_ids":["https://openalex.org/I99682543","https://openalex.org/I4210166615"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003076174","display_name":"Alysson Bessani","orcid":"https://orcid.org/0000-0002-8386-1628"},"institutions":[{"id":"https://openalex.org/I141596103","display_name":"University of Lisbon","ror":"https://ror.org/01c27hj86","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Alysson Bessani","raw_affiliation_strings":["LASIGE, Faculdade de Ci\u00eancias, Universidade de Lisboa, Lisboa, Portugal"],"affiliations":[{"raw_affiliation_string":"LASIGE, Faculdade de Ci\u00eancias, Universidade de Lisboa, Lisboa, Portugal","institution_ids":["https://openalex.org/I141596103"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5000397453"],"corresponding_institution_ids":["https://openalex.org/I141596103"],"apc_list":null,"apc_paid":null,"fwci":1.7716,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.85816025,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"70","issue":"5","first_page":"669","last_page":"681"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.9592241644859314},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7709959745407104},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5898550152778625},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5104164481163025},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.4780367314815521},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.47717443108558655},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.47419124841690063},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.4347715675830841},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.42254164814949036},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3446803689002991},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.17450463771820068},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.1298949420452118},{"id":"https://openalex.org/keywords/dna","display_name":"DNA","score":0.12161523103713989}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.9592241644859314},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7709959745407104},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5898550152778625},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5104164481163025},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.4780367314815521},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.47717443108558655},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.47419124841690063},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.4347715675830841},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.42254164814949036},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3446803689002991},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.17450463771820068},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.1298949420452118},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.12161523103713989},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tc.2020.2994774","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2020.2994774","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},{"id":"pmh:oai:repositorio.inesctec.pt:123456789/11369","is_oa":true,"landing_page_url":"http://repositorio.inesctec.pt/handle/123456789/11369","pdf_url":null,"source":{"id":"https://openalex.org/S4306402433","display_name":"Portuguese National Funding Agency for Science, Research and Technology (RCAAP Project by FCT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"other"}],"best_oa_location":{"id":"pmh:oai:repositorio.inesctec.pt:123456789/11369","is_oa":true,"landing_page_url":"http://repositorio.inesctec.pt/handle/123456789/11369","pdf_url":null,"source":{"id":"https://openalex.org/S4306402433","display_name":"Portuguese National Funding Agency for Science, Research and Technology (RCAAP Project by FCT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"other"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1759428316","display_name":null,"funder_award_id":"UIDP/00408/2020","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G2089664753","display_name":null,"funder_award_id":"UIDB/00408/2020","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G3918034096","display_name":null,"funder_award_id":"H2020-ICT-643964","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G4528900011","display_name":null,"funder_award_id":"UIDB/50014/2020","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G933579255","display_name":null,"funder_award_id":"PTDC/EEISCR/6970/2014","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"}],"funders":[{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W69510097","https://openalex.org/W89823361","https://openalex.org/W200233886","https://openalex.org/W841014437","https://openalex.org/W1548134937","https://openalex.org/W1599740911","https://openalex.org/W1967239133","https://openalex.org/W1970218893","https://openalex.org/W1982703709","https://openalex.org/W2006088692","https://openalex.org/W2011430131","https://openalex.org/W2024143985","https://openalex.org/W2051929999","https://openalex.org/W2055899630","https://openalex.org/W2081930221","https://openalex.org/W2092880969","https://openalex.org/W2093931624","https://openalex.org/W2103903744","https://openalex.org/W2108234281","https://openalex.org/W2111044311","https://openalex.org/W2114792961","https://openalex.org/W2117608012","https://openalex.org/W2120048439","https://openalex.org/W2121016876","https://openalex.org/W2127230663","https://openalex.org/W2132069633","https://openalex.org/W2138196010","https://openalex.org/W2140431670","https://openalex.org/W2141287548","https://openalex.org/W2147717514","https://openalex.org/W2153621639","https://openalex.org/W2156468933","https://openalex.org/W2159683766","https://openalex.org/W2166588423","https://openalex.org/W2207690983","https://openalex.org/W2296597603","https://openalex.org/W2407551165","https://openalex.org/W2432359906","https://openalex.org/W2612457169","https://openalex.org/W2741506553","https://openalex.org/W2889470119","https://openalex.org/W2905575949","https://openalex.org/W2951070849","https://openalex.org/W6602837329","https://openalex.org/W6603735688","https://openalex.org/W6608138927","https://openalex.org/W6679663036","https://openalex.org/W6697520301","https://openalex.org/W6741902625"],"related_works":["https://openalex.org/W3144870715","https://openalex.org/W3142319788","https://openalex.org/W2587188779","https://openalex.org/W3132870970","https://openalex.org/W2943088381","https://openalex.org/W4385804830","https://openalex.org/W2144348063","https://openalex.org/W4296125805","https://openalex.org/W2074021203","https://openalex.org/W2380556100"],"abstract_inverted_index":{"The":[0],"vast":[1],"datasets":[2],"produced":[3],"in":[4,99,151],"human":[5,62],"genomics":[6],"must":[7],"be":[8],"efficiently":[9],"stored,":[10],"transferred,":[11],"and":[12,18,79,85,112,127,154,173],"processed":[13],"while":[14,45],"prioritizing":[15],"storage":[16,101],"space":[17,110],"restore":[19,113],"performance.":[20,89],"Balancing":[21],"these":[22],"two":[23],"properties":[24],"becomes":[25],"challenging":[26],"when":[27],"resorting":[28],"to":[29,51,56,94],"traditional":[30],"data":[31,41,156,168],"compression":[32],"techniques.":[33],"In":[34,105],"fact,":[35],"specialized":[36,128,149],"algorithms":[37],"for":[38,73,77,130],"compressing":[39],"sequencing":[40,132],"favor":[42],"the":[43,59,100,119,141,147,162],"former,":[44],"large":[46],"genome":[47,131],"repositories":[48],"widely":[49],"resort":[50],"generic":[52],"compressors":[53],"(e.g.,":[54],"GZIP)":[55],"benefit":[57],"from":[58],"latter.":[60],"Notably,":[61],"beings":[63],"have":[64],"approximately":[65],"99.9":[66],"percent":[67,139],"of":[68,103,140,144],"DNA":[69],"sequence":[70],"similarity,":[71],"vouching":[72],"an":[74],"excellent":[75],"opportunity":[76],"deduplication":[78,92,126],"its":[80],"assets:":[81],"leveraging":[82],"inter-file":[83],"similarity":[84],"achieving":[86],"higher":[87],"read":[88],"However,":[90],"identity-based":[91],"fails":[93],"provide":[95],"a":[96],"satisfactory":[97],"reduction":[98,142],"requirements":[102],"genomes.":[104],"this":[106,152],"article,":[107],"we":[108],"balance":[109],"savings":[111],"performance":[114],"by":[115],"proposing":[116],"\\sf":[117],"GenoDedupGenoDedup,":[118],"first":[120],"method":[121],"that":[122],"integrates":[123],"efficient":[124],"similarity-based":[125],"delta-encoding":[129],"data.":[133],"Our":[134],"solution":[135],"currently":[136],"achieves":[137],"67.8":[138],"gains":[143],"SPRING":[145,172],"(i.e.,":[146,161],"best":[148],"tool":[150],"metric)":[153],"restores":[155,167],"1.62\u00d71.62\u00d7":[157],"faster":[158,170],"than":[159,171,178],"SeqDB":[160],"fastest":[163],"competitor).":[164],"Additionally,":[165],"GenoDedupGenoDedup":[166],"9.96\u00d79.96\u00d7":[169],"compresses":[174],"files":[175],"2.05\u00d72.05\u00d7":[176],"more":[177],"SeqDB.":[179]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-08T08:50:53.379069","created_date":"2025-10-10T00:00:00"}
