{"id":"https://openalex.org/W4387346564","doi":"https://doi.org/10.1145/3584371.3612946","title":"Optimizing K-Mer Fingerprint Generation for Machine Learning","display_name":"Optimizing K-Mer Fingerprint Generation for Machine Learning","publication_year":2023,"publication_date":"2023-09-03","ids":{"openalex":"https://openalex.org/W4387346564","doi":"https://doi.org/10.1145/3584371.3612946"},"language":"en","primary_location":{"id":"doi:10.1145/3584371.3612946","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3584371.3612946","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3584371.3612946","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3584371.3612946","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081800708","display_name":"Cory Kromer-Edwards","orcid":"https://orcid.org/0000-0002-5804-8520"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cory Kromer-Edwards","raw_affiliation_strings":["Machine Translation, Ontario, United States"],"raw_orcid":"https://orcid.org/0000-0002-5804-8520","affiliations":[{"raw_affiliation_string":"Machine Translation, Ontario, United States","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5081800708"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1499,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.5945001,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7903000116348267},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7673289179801941},{"id":"https://openalex.org/keywords/fingerprint","display_name":"Fingerprint (computing)","score":0.6516237854957581},{"id":"https://openalex.org/keywords/k-mer","display_name":"k-mer","score":0.62618488073349},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39771294593811035},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3930931091308594},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3786677420139313},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.3749257028102875},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35410213470458984},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3492125868797302},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34870103001594543}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7903000116348267},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7673289179801941},{"id":"https://openalex.org/C2777826928","wikidata":"https://www.wikidata.org/wiki/Q3745713","display_name":"Fingerprint (computing)","level":2,"score":0.6516237854957581},{"id":"https://openalex.org/C2279292","wikidata":"https://www.wikidata.org/wiki/Q6322851","display_name":"k-mer","level":4,"score":0.62618488073349},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39771294593811035},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3930931091308594},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3786677420139313},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.3749257028102875},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35410213470458984},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3492125868797302},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34870103001594543},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3584371.3612946","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3584371.3612946","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3584371.3612946","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3584371.3612946","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3584371.3612946","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3584371.3612946","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387346564.pdf"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W2096128575","https://openalex.org/W2129916363","https://openalex.org/W2582377823","https://openalex.org/W2583363792","https://openalex.org/W2597444305","https://openalex.org/W2800388620","https://openalex.org/W2950064778","https://openalex.org/W2963173114","https://openalex.org/W3149549685","https://openalex.org/W3164636936","https://openalex.org/W3191207131","https://openalex.org/W3192236788","https://openalex.org/W4313525427","https://openalex.org/W4385478232","https://openalex.org/W4385484764"],"related_works":["https://openalex.org/W1509211761","https://openalex.org/W2391299576","https://openalex.org/W2133693067","https://openalex.org/W1531488649","https://openalex.org/W3037767301","https://openalex.org/W1585350690","https://openalex.org/W2366027386","https://openalex.org/W2582456645","https://openalex.org/W2479014312","https://openalex.org/W1583465708"],"abstract_inverted_index":{"With":[0],"the":[1],"increasing":[2],"availability":[3],"of":[4,60,110],"genomic":[5],"data":[6],"obtained":[7],"through":[8],"Whole-Genome":[9],"Sequencing":[10],"(WGS),":[11],"Machine":[12],"Learning":[13],"(ML)":[14],"algorithms":[15],"are":[16],"being":[17],"used":[18,40],"to":[19,34,50,103],"analyze":[20],"this":[21],"data.":[22],"However,":[23,44],"processing":[24],"large":[25],"datasets":[26],"or":[27],"files":[28],"poses":[29],"challenges.":[30],"One":[31],"approach":[32],"is":[33],"count":[35],"K-Mers,":[36],"which":[37,90],"has":[38,65],"been":[39],"in":[41,68],"ML":[42],"studies.":[43],"larger":[45],"K-Mer":[46,87],"sizes":[47,62],"may":[48],"lead":[49],"decreased":[51],"accuracy":[52],"and":[53,71],"training":[54],"difficulties.":[55],"Alternatively,":[56],"combining":[57],"multiple":[58],"K-Mers":[59],"smaller":[61],"into":[63],"fingerprints":[64],"shown":[66],"promise":[67],"predicting":[69],"species":[70],"antibiotic":[72],"resistance.":[73],"This":[74],"study":[75],"compares":[76],"existing":[77],"fingerprint":[78],"generation":[79],"techniques":[80],"with":[81],"a":[82,92,108],"new":[83],"algorithm":[84],"called":[85],"GPU":[86,93],"Fingerprinting":[88],"(GKF),":[89],"utilizes":[91],"for":[94],"parallel":[95],"processing.":[96],"GKF":[97],"demonstrates":[98],"similar":[99],"memory":[100],"utilization":[101],"compared":[102],"other":[104],"approaches":[105],"but":[106],"achieves":[107],"speedup":[109],"5,546X.":[111]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
