{"id":"https://openalex.org/W2794580638","doi":"https://doi.org/10.1186/s12859-018-2080-y","title":"Alignment-free clustering of large data sets of unannotated protein conserved regions using minhashing","display_name":"Alignment-free clustering of large data sets of unannotated protein conserved regions using minhashing","publication_year":2018,"publication_date":"2018-03-05","ids":{"openalex":"https://openalex.org/W2794580638","doi":"https://doi.org/10.1186/s12859-018-2080-y","mag":"2794580638","pmid":"https://pubmed.ncbi.nlm.nih.gov/29506470"},"language":"en","primary_location":{"id":"doi:10.1186/s12859-018-2080-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-018-2080-y","pdf_url":null,"source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1186/s12859-018-2080-y","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041969204","display_name":"Armen Abnousi","orcid":"https://orcid.org/0000-0003-1822-0928"},"institutions":[{"id":"https://openalex.org/I170632171","display_name":"Washington State University Spokane","ror":"https://ror.org/04vfs5h36","country_code":"US","type":"education","lineage":["https://openalex.org/I170632171","https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Armen Abnousi","raw_affiliation_strings":["School of EECS, Washington State University, 355 NE Spokane St, Pullman, 99164, USA. aabnousi@eecs.wsu.edu","School of EECS, Washington State University, 355 NE Spokane St, Pullman, 99164, USA"],"raw_orcid":"https://orcid.org/0000-0003-1822-0928","affiliations":[{"raw_affiliation_string":"School of EECS, Washington State University, 355 NE Spokane St, Pullman, 99164, USA. aabnousi@eecs.wsu.edu","institution_ids":["https://openalex.org/I170632171"]},{"raw_affiliation_string":"School of EECS, Washington State University, 355 NE Spokane St, Pullman, 99164, USA","institution_ids":["https://openalex.org/I170632171"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037467986","display_name":"Shira L. Broschat","orcid":"https://orcid.org/0000-0001-9894-7371"},"institutions":[{"id":"https://openalex.org/I170632171","display_name":"Washington State University Spokane","ror":"https://ror.org/04vfs5h36","country_code":"US","type":"education","lineage":["https://openalex.org/I170632171","https://openalex.org/I72951846"]},{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shira L. Broschat","raw_affiliation_strings":["Department of Veterinary Microbiology and Pathology, Washington State University, Pullman, 99164, USA","Paul G. Allen School for Global Animal Health, Washington State University, Pullman, 99164, USA","School of EECS, Washington State University, 355 NE Spokane St, Pullman, 99164, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Veterinary Microbiology and Pathology, Washington State University, Pullman, 99164, USA","institution_ids":["https://openalex.org/I72951846"]},{"raw_affiliation_string":"Paul G. Allen School for Global Animal Health, Washington State University, Pullman, 99164, USA","institution_ids":["https://openalex.org/I72951846"]},{"raw_affiliation_string":"School of EECS, Washington State University, 355 NE Spokane St, Pullman, 99164, USA","institution_ids":["https://openalex.org/I170632171"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103010775","display_name":"Ananth Kalyanaraman","orcid":"https://orcid.org/0000-0003-3495-2264"},"institutions":[{"id":"https://openalex.org/I170632171","display_name":"Washington State University Spokane","ror":"https://ror.org/04vfs5h36","country_code":"US","type":"education","lineage":["https://openalex.org/I170632171","https://openalex.org/I72951846"]},{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ananth Kalyanaraman","raw_affiliation_strings":["Paul G. Allen School for Global Animal Health, Washington State University, Pullman, 99164, USA","School of EECS, Washington State University, 355 NE Spokane St, Pullman, 99164, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Paul G. Allen School for Global Animal Health, Washington State University, Pullman, 99164, USA","institution_ids":["https://openalex.org/I72951846"]},{"raw_affiliation_string":"School of EECS, Washington State University, 355 NE Spokane St, Pullman, 99164, USA","institution_ids":["https://openalex.org/I170632171"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5041969204"],"corresponding_institution_ids":["https://openalex.org/I170632171"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":0.1856,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.5243771,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"19","issue":"1","first_page":"83","last_page":"83"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.37389999628067017,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.37389999628067017,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.19830000400543213,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.08590000122785568,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.6475879549980164},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6297221183776855},{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.5252346992492676},{"id":"https://openalex.org/keywords/conserved-sequence","display_name":"Conserved sequence","score":0.45740583539009094},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.4510825574398041},{"id":"https://openalex.org/keywords/sequence-alignment","display_name":"Sequence alignment","score":0.4206914007663727},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.3911067843437195},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3736079931259155},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3209728002548218},{"id":"https://openalex.org/keywords/base-sequence","display_name":"Base sequence","score":0.24770775437355042},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.21366319060325623},{"id":"https://openalex.org/keywords/peptide-sequence","display_name":"Peptide sequence","score":0.14711320400238037},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.11128941178321838},{"id":"https://openalex.org/keywords/gene-expression","display_name":"Gene expression","score":0.08168572187423706}],"concepts":[{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.6475879549980164},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6297221183776855},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.5252346992492676},{"id":"https://openalex.org/C199216141","wikidata":"https://www.wikidata.org/wiki/Q4995178","display_name":"Conserved sequence","level":4,"score":0.45740583539009094},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.4510825574398041},{"id":"https://openalex.org/C45484198","wikidata":"https://www.wikidata.org/wiki/Q827246","display_name":"Sequence alignment","level":4,"score":0.4206914007663727},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.3911067843437195},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3736079931259155},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3209728002548218},{"id":"https://openalex.org/C3017666073","wikidata":"https://www.wikidata.org/wiki/Q1764062","display_name":"Base sequence","level":3,"score":0.24770775437355042},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.21366319060325623},{"id":"https://openalex.org/C167625842","wikidata":"https://www.wikidata.org/wiki/Q899763","display_name":"Peptide sequence","level":3,"score":0.14711320400238037},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.11128941178321838},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.08168572187423706}],"mesh":[{"descriptor_ui":"D000072417","descriptor_name":"Protein Domains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000072417","descriptor_name":"Protein Domains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000072417","descriptor_name":"Protein Domains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010802","descriptor_name":"Phylogeny","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010802","descriptor_name":"Phylogeny","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010802","descriptor_name":"Phylogeny","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012281","descriptor_name":"Rickettsia","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D012281","descriptor_name":"Rickettsia","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D012281","descriptor_name":"Rickettsia","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D058977","descriptor_name":"Molecular Sequence Annotation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D058977","descriptor_name":"Molecular Sequence Annotation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D058977","descriptor_name":"Molecular Sequence Annotation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":5,"locations":[{"id":"doi:10.1186/s12859-018-2080-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-018-2080-y","pdf_url":null,"source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:29506470","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/29506470","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:aa8bde82f49447489d855b62c8b337bd","is_oa":true,"landing_page_url":"https://doaj.org/article/aa8bde82f49447489d855b62c8b337bd","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 19, Iss 1, Pp 1-18 (2018)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:5838936","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/5838936","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"},{"id":"pmh:oai:research.libraries.wsu.edu:2376/17909","is_oa":true,"landing_page_url":"http://hdl.handle.net/2376/17909","pdf_url":null,"source":{"id":"https://openalex.org/S4377196954","display_name":"Research Exchange (Washington State University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I72951846","host_organization_name":"Washington State University","host_organization_lineage":["https://openalex.org/I72951846"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":{"id":"doi:10.1186/s12859-018-2080-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-018-2080-y","pdf_url":null,"source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4000000059604645}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W67385939","https://openalex.org/W103309654","https://openalex.org/W1502916507","https://openalex.org/W1519039476","https://openalex.org/W1534345961","https://openalex.org/W1565276997","https://openalex.org/W1604983895","https://openalex.org/W1968363797","https://openalex.org/W1977922844","https://openalex.org/W1988746124","https://openalex.org/W2002932694","https://openalex.org/W2004622806","https://openalex.org/W2007745123","https://openalex.org/W2029954707","https://openalex.org/W2037275405","https://openalex.org/W2042718573","https://openalex.org/W2055043387","https://openalex.org/W2058292646","https://openalex.org/W2066090568","https://openalex.org/W2066737015","https://openalex.org/W2074231493","https://openalex.org/W2081193615","https://openalex.org/W2087064593","https://openalex.org/W2088574556","https://openalex.org/W2109867978","https://openalex.org/W2127648442","https://openalex.org/W2128899444","https://openalex.org/W2129448726","https://openalex.org/W2131681506","https://openalex.org/W2132069633","https://openalex.org/W2134875617","https://openalex.org/W2147717514","https://openalex.org/W2152565070","https://openalex.org/W2154139219","https://openalex.org/W2157190239","https://openalex.org/W2158714788","https://openalex.org/W2166837162","https://openalex.org/W2170747616","https://openalex.org/W2173213060","https://openalex.org/W2224056471","https://openalex.org/W2282703641","https://openalex.org/W2514476306","https://openalex.org/W2540100482","https://openalex.org/W2558632230","https://openalex.org/W2592535873","https://openalex.org/W3099768174","https://openalex.org/W4210702584","https://openalex.org/W4245970816","https://openalex.org/W6629956336"],"related_works":["https://openalex.org/W2181549235","https://openalex.org/W1973727281","https://openalex.org/W2046212479","https://openalex.org/W3209520725","https://openalex.org/W2003319012","https://openalex.org/W2031006065","https://openalex.org/W2044507120","https://openalex.org/W2514476306","https://openalex.org/W2023479323","https://openalex.org/W2143582965"],"abstract_inverted_index":{"BACKGROUND:":[0],"Clustering":[1],"of":[2,6,15,22,30,52,145,169,216],"protein":[3,36,54,77,243],"sequences":[4,37,78,153,171],"is":[5,20,220],"key":[7],"importance":[8],"in":[9,154],"predicting":[10],"the":[11,28,115,136,143,146,152,176,195],"structure":[12],"and":[13,19,106],"function":[14],"newly":[16],"sequenced":[17],"proteins":[18],"also":[21],"use":[23],"for":[24,75,90,103,150,165,232,241],"their":[25],"annotation.":[26],"With":[27],"advent":[29],"multiple":[31],"high-throughput":[32],"sequencing":[33],"technologies,":[34],"new":[35,206],"are":[38],"becoming":[39],"available":[40],"at":[41],"an":[42,70],"extraordinary":[43],"rate.":[44],"The":[45,83,205],"rapid":[46],"growth":[47],"rate":[48],"has":[49],"impeded":[50],"deployment":[51],"existing":[53,128],"clustering/annotation":[55],"tools":[56],"which":[57,151],"depend":[58],"largely":[59],"on":[60],"pairwise":[61,201],"sequence":[62],"alignment.":[63],"RESULTS:":[64],"In":[65,131],"this":[66],"paper,":[67],"we":[68,133],"propose":[69],"alignment-free":[71],"clustering":[72,207],"approach,":[73],"coreClust,":[74],"annotating":[76,242],"using":[79],"detected":[80],"conserved":[81,93,217,234],"regions.":[82,94,218],"proposed":[84],"algorithm":[85,111,141,181,208],"uses":[86],"Min-Wise":[87,95],"Independent":[88,96],"Hashing":[89,97],"identifying":[91],"similar":[92,159],"works":[98],"by":[99,139,179,198],"generating":[100],"a":[101,155,158,166,183,199,221,237],"(w,c)-sketch":[102],"each":[104],"document":[105],"comparing":[107],"these":[108],"sketches.":[109],"Our":[110],"fits":[112],"well":[113],"within":[114],"MapReduce":[116],"framework,":[117],"permitting":[118],"scalability.":[119],"We":[120,162],"show":[121,134,163],"that":[122,135,164,224],"coreClust":[123],"generates":[124],"results":[125],"comparable":[126],"to":[127,194,212],"known":[129],"methods.":[130],"particular,":[132],"clusters":[137,177,196,215],"generated":[138,178,197],"our":[140,180,189,228],"capture":[142],"subfamilies":[144],"Pfam":[147],"domain":[148,160,174],"families":[149],"cluster":[156],"have":[157],"architecture.":[161],"data":[167],"set":[168],"90,000":[170],"(about":[172],"250,000":[173],"regions),":[175],"give":[182],"75%":[184],"average":[185],"weighted":[186],"F1":[187],"score,":[188],"accuracy":[190],"metric,":[191],"when":[192,225],"compared":[193],"semi-exhaustive":[200],"alignment":[202],"algorithm.":[203],"CONCLUSIONS:":[204],"can":[209],"be":[210],"used":[211],"generate":[213],"meaningful":[214],"It":[219],"scalable":[222],"method":[223],"paired":[226],"with":[227],"prior":[229],"work,":[230],"NADDA":[231],"detecting":[233],"regions,":[235],"provides":[236],"complete":[238],"end-to-end":[239],"pipeline":[240],"sequences.":[244]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
