{"id":"https://openalex.org/W4220987690","doi":"https://doi.org/10.3390/a15040107","title":"KMC3 and CHTKC: Best Scenarios, Deficiencies, and Challenges in High-Throughput Sequencing Data Analysis","display_name":"KMC3 and CHTKC: Best Scenarios, Deficiencies, and Challenges in High-Throughput Sequencing Data Analysis","publication_year":2022,"publication_date":"2022-03-24","ids":{"openalex":"https://openalex.org/W4220987690","doi":"https://doi.org/10.3390/a15040107"},"language":"en","primary_location":{"id":"doi:10.3390/a15040107","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a15040107","pdf_url":"https://www.mdpi.com/1999-4893/15/4/107/pdf?version=1648090135","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/15/4/107/pdf?version=1648090135","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074459001","display_name":"Deyou Tang","orcid":"https://orcid.org/0000-0002-3796-3965"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Deyou Tang","raw_affiliation_strings":["School of Software Engineering, South China University of Technology, Guangzhou 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Software Engineering, South China University of Technology, Guangzhou 510006, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063980714","display_name":"Daqiang Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daqiang Tan","raw_affiliation_strings":["School of Software Engineering, South China University of Technology, Guangzhou 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Software Engineering, South China University of Technology, Guangzhou 510006, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063309680","display_name":"Weihao Xiao","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihao Xiao","raw_affiliation_strings":["School of Software Engineering, South China University of Technology, Guangzhou 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Software Engineering, South China University of Technology, Guangzhou 510006, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044301786","display_name":"Jiabin Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiabin Lin","raw_affiliation_strings":["School of Software Engineering, South China University of Technology, Guangzhou 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Software Engineering, South China University of Technology, Guangzhou 510006, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103782401","display_name":"Juan Fu","orcid":"https://orcid.org/0009-0000-5684-6767"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Juan Fu","raw_affiliation_strings":["School of Medicine, South China University of Technology, Guangzhou 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Medicine, South China University of Technology, Guangzhou 510006, China","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5074459001"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02332286,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"15","issue":"4","first_page":"107","last_page":"107"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8601166009902954},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8094046115875244},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.5676391124725342},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5103239417076111},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4743824303150177},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.4657355844974518},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4494721591472626},{"id":"https://openalex.org/keywords/k-mer","display_name":"k-mer","score":0.4485177993774414},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.442097544670105},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.21617501974105835},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.12772613763809204},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1271631121635437},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1113119125366211},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11027634143829346},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.10701650381088257}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8601166009902954},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8094046115875244},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.5676391124725342},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5103239417076111},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4743824303150177},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.4657355844974518},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4494721591472626},{"id":"https://openalex.org/C2279292","wikidata":"https://www.wikidata.org/wiki/Q6322851","display_name":"k-mer","level":4,"score":0.4485177993774414},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.442097544670105},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.21617501974105835},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.12772613763809204},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1271631121635437},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1113119125366211},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11027634143829346},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.10701650381088257},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/a15040107","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a15040107","pdf_url":"https://www.mdpi.com/1999-4893/15/4/107/pdf?version=1648090135","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:5a599a4ab9d74e0da7a0fda7759fc74f","is_oa":true,"landing_page_url":"https://doaj.org/article/5a599a4ab9d74e0da7a0fda7759fc74f","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 15, Iss 4, p 107 (2022)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/1999-4893/15/4/107/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/a15040107","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms; Volume 15; Issue 4; Pages: 107","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/a15040107","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a15040107","pdf_url":"https://www.mdpi.com/1999-4893/15/4/107/pdf?version=1648090135","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.44999998807907104}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4220987690.pdf","grobid_xml":"https://content.openalex.org/works/W4220987690.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1606270358","https://openalex.org/W1954100204","https://openalex.org/W2027667941","https://openalex.org/W2037444377","https://openalex.org/W2057253402","https://openalex.org/W2076747312","https://openalex.org/W2096128575","https://openalex.org/W2097066660","https://openalex.org/W2097160089","https://openalex.org/W2103962160","https://openalex.org/W2125266506","https://openalex.org/W2159946869","https://openalex.org/W2163584430","https://openalex.org/W2171003081","https://openalex.org/W2188584922","https://openalex.org/W2299673938","https://openalex.org/W2411730464","https://openalex.org/W2581367156","https://openalex.org/W2583363792","https://openalex.org/W2597444305","https://openalex.org/W2612838847","https://openalex.org/W2796759716","https://openalex.org/W2897603717","https://openalex.org/W2897927784","https://openalex.org/W2901365579","https://openalex.org/W2914657230","https://openalex.org/W2949074212","https://openalex.org/W2952158800","https://openalex.org/W2986205472","https://openalex.org/W2998912266","https://openalex.org/W3027033297","https://openalex.org/W3131575902","https://openalex.org/W3142668023","https://openalex.org/W3216842221","https://openalex.org/W6732773109","https://openalex.org/W6756107926"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W2885125400","https://openalex.org/W2135248929","https://openalex.org/W2092690310","https://openalex.org/W1493624592","https://openalex.org/W3190662310","https://openalex.org/W2798440551","https://openalex.org/W1507878993"],"abstract_inverted_index":{"Background:":[0],"K-mer":[1],"frequency":[2],"counting":[3,22,26,116,176,198],"is":[4,66,87,97,142,160,172,192,206],"an":[5],"upstream":[6],"process":[7],"of":[8,93,138,188],"many":[9],"bioinformatics":[10],"data":[11],"analysis":[12],"workflows.":[13],"KMC3":[14,51,86,114,159],"and":[15,23,35,41,48,55,77,96,107,124,134,151,170,200,202],"CHTKC":[16,59,65,110,171],"are":[17,129],"the":[18,32,91,136,139,186,189,196],"representative":[19],"partition-based":[20],"k-mer":[21,25,197],"non-partition-based":[24],"algorithms,":[27],"respectively.":[28],"This":[29],"paper":[30],"evaluates":[31],"two":[33],"algorithms":[34,128],"presents":[36],"their":[37],"best":[38],"applicable":[39],"scenarios":[40],"potential":[42],"improvements":[43],"using":[44],"multiple":[45],"hardware":[46,168],"contexts":[47],"datasets.":[49],"Results:":[50],"uses":[52],"less":[53,88],"memory":[54],"runs":[56],"faster":[57],"than":[58,113],"on":[60,68,166,181],"a":[61],"regular":[62],"configuration":[63],"server.":[64],"efficient":[67,99],"high-performance":[69],"computing":[70,183],"platforms":[71],"with":[72,83,102,118],"high":[73,121],"available":[74],"memory,":[75],"multi-thread,":[76],"low":[78,104],"IO":[79,132,140,190,210],"bandwidth.":[80],"When":[81],"tested":[82],"various":[84],"datasets,":[85,120],"sensitive":[89],"to":[90],"number":[92],"distinct":[94],"k-mers":[95,155,177,205],"more":[98,161,173],"for":[100,163,175,194],"tasks":[101],"relatively":[103],"sequencing":[105,122],"quality":[106],"long":[108],"k-mer.":[109,126],"performs":[111],"better":[112],"in":[115,156,178,208],"assignments":[117],"large-scale":[119],"quality,":[123],"short":[125],"Both":[127],"affected":[130],"by":[131,149],"bandwidth,":[133],"decreasing":[135],"influence":[137,187],"bottleneck":[141,191],"critical":[143,207],"as":[144],"our":[145],"tests":[146],"show":[147],"improvement":[148],"filtering":[150,201],"compressing":[152,203],"consecutive":[153],"first-occurring":[154],"KMC3.":[157],"Conclusions:":[158],"competitive":[162,174],"running":[164],"counter":[165],"ordinary":[167],"resources,":[169],"super-scale":[179],"datasets":[180],"higher-performance":[182],"platforms.":[184],"Reducing":[185],"essential":[193],"optimizing":[195],"algorithm,":[199],"low-frequency":[204],"relieving":[209],"impact.":[211]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2022-04-03T00:00:00"}
