{"id":"https://openalex.org/W4312113617","doi":"https://doi.org/10.1186/s12859-022-05105-y","title":"Unsupervised outlier detection applied to SARS-CoV-2 nucleotide sequences can identify sequences of common variants and other variants of interest","display_name":"Unsupervised outlier detection applied to SARS-CoV-2 nucleotide sequences can identify sequences of common variants and other variants of interest","publication_year":2022,"publication_date":"2022-12-19","ids":{"openalex":"https://openalex.org/W4312113617","doi":"https://doi.org/10.1186/s12859-022-05105-y","pmid":"https://pubmed.ncbi.nlm.nih.gov/36536276"},"language":"en","primary_location":{"id":"doi:10.1186/s12859-022-05105-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-022-05105-y","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/s12859-022-05105-y","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/s12859-022-05105-y","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010215905","display_name":"Georg Hahn","orcid":"https://orcid.org/0000-0001-6008-2720"},"institutions":[{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Georg Hahn","raw_affiliation_strings":["Department of Biostatistics, T.H. Chan School of Public Health, Harvard University, Boston, MA, 02115, USA. ghahn@hsph.harvard.edu","Department of Biostatistics, T.H. Chan School of Public Health, Harvard University, Boston, MA, 02115, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Biostatistics, T.H. Chan School of Public Health, Harvard University, Boston, MA, 02115, USA. ghahn@hsph.harvard.edu","institution_ids":["https://openalex.org/I136199984"]},{"raw_affiliation_string":"Department of Biostatistics, T.H. Chan School of Public Health, Harvard University, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I136199984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100434238","display_name":"Sanghun Lee","orcid":"https://orcid.org/0000-0002-0573-9555"},"institutions":[{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]},{"id":"https://openalex.org/I89015989","display_name":"Dankook University","ror":"https://ror.org/058pdbn81","country_code":"KR","type":"education","lineage":["https://openalex.org/I89015989"]}],"countries":["KR","US"],"is_corresponding":false,"raw_author_name":"Sanghun Lee","raw_affiliation_strings":["Department of Biostatistics, T.H. Chan School of Public Health, Harvard University, Boston, MA, 02115, USA","Department of Medical Consilience, Graduate School, Dankook University, Yongin, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Biostatistics, T.H. Chan School of Public Health, Harvard University, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I136199984"]},{"raw_affiliation_string":"Department of Medical Consilience, Graduate School, Dankook University, Yongin, South Korea","institution_ids":["https://openalex.org/I89015989"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019016063","display_name":"Dmitry Prokopenko","orcid":"https://orcid.org/0000-0002-1844-5652"},"institutions":[{"id":"https://openalex.org/I4210087915","display_name":"Massachusetts General Hospital","ror":"https://ror.org/002pd6e78","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210087915","https://openalex.org/I48633490"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dmitry Prokopenko","raw_affiliation_strings":["Genetics and Aging Research Unit, Department of Neurology, McCance Center for Brain Health, Massachusetts General Hospital, Boston, MA, 02114, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Genetics and Aging Research Unit, Department of Neurology, McCance Center for Brain Health, Massachusetts General Hospital, Boston, MA, 02114, USA","institution_ids":["https://openalex.org/I4210087915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038221509","display_name":"Jonathan Abraham","orcid":"https://orcid.org/0000-0002-7937-3920"},"institutions":[{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]},{"id":"https://openalex.org/I4210115614","display_name":"Boston VA Research Institute","ror":"https://ror.org/02419mc73","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210115614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonathan Abraham","raw_affiliation_strings":["Department of Microbiology, Harvard Medical School, Blavatnik Institute, 77 Avenue Louis Pasteur, Boston, MA, 02115, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Microbiology, Harvard Medical School, Blavatnik Institute, 77 Avenue Louis Pasteur, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I4210115614","https://openalex.org/I136199984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003063033","display_name":"Tanya Novak","orcid":"https://orcid.org/0000-0002-7115-7545"},"institutions":[{"id":"https://openalex.org/I1288882113","display_name":"Boston Children's Hospital","ror":"https://ror.org/00dvg7y05","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1288882113"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tanya Novak","raw_affiliation_strings":["Department of Anesthesiology, Critical Care and Pain Medicine, Boston Children's Hospital, Boston, MA, 02115, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Anesthesiology, Critical Care and Pain Medicine, Boston Children's Hospital, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I1288882113"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080474017","display_name":"Julian Hecker","orcid":"https://orcid.org/0000-0001-7918-089X"},"institutions":[{"id":"https://openalex.org/I1283280774","display_name":"Brigham and Women's Hospital","ror":"https://ror.org/04b6nzv94","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283280774","https://openalex.org/I48633490"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Julian Hecker","raw_affiliation_strings":["Channing Division of Network Medicine, Department of Medicine, Brigham and Women's Hospital, Boston, MA, 02115, USA","Harvard Medical School, Harvard University, Boston, MA, 02115, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Channing Division of Network Medicine, Department of Medicine, Brigham and Women's Hospital, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I1283280774"]},{"raw_affiliation_string":"Harvard Medical School, Harvard University, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I136199984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100602804","display_name":"Michael Cho","orcid":"https://orcid.org/0000-0002-8191-8045"},"institutions":[{"id":"https://openalex.org/I1283280774","display_name":"Brigham and Women's Hospital","ror":"https://ror.org/04b6nzv94","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283280774","https://openalex.org/I48633490"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Cho","raw_affiliation_strings":["Channing Division of Network Medicine, Department of Medicine, Brigham and Women's Hospital, Boston, MA, 02115, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Channing Division of Network Medicine, Department of Medicine, Brigham and Women's Hospital, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I1283280774"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071330243","display_name":"Surender Khurana","orcid":"https://orcid.org/0000-0002-0593-7965"},"institutions":[{"id":"https://openalex.org/I1320320070","display_name":"United States Food and Drug Administration","ror":"https://ror.org/034xvzb47","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1320320070"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Surender Khurana","raw_affiliation_strings":["Food and Drug Administration, Silver Spring, MD, 20993, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Food and Drug Administration, Silver Spring, MD, 20993, USA","institution_ids":["https://openalex.org/I1320320070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091029647","display_name":"Lindsey R. Baden","orcid":null},"institutions":[{"id":"https://openalex.org/I1283280774","display_name":"Brigham and Women's Hospital","ror":"https://ror.org/04b6nzv94","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283280774","https://openalex.org/I48633490"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lindsey R. Baden","raw_affiliation_strings":["Division of Infectious Diseases, Harvard Medical School, Brigham and Women's Hospital, Boston, MA, 02115, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Division of Infectious Diseases, Harvard Medical School, Brigham and Women's Hospital, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I1283280774","https://openalex.org/I136199984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072931455","display_name":"Adrienne G. Randolph","orcid":"https://orcid.org/0000-0002-3084-3071"},"institutions":[{"id":"https://openalex.org/I1288882113","display_name":"Boston Children's Hospital","ror":"https://ror.org/00dvg7y05","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1288882113"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adrienne G. Randolph","raw_affiliation_strings":["Department of Anesthesiology, Critical Care and Pain Medicine, Boston Children's Hospital, Boston, MA, 02115, USA","Harvard Medical School, Harvard University, Boston, MA, 02115, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Anesthesiology, Critical Care and Pain Medicine, Boston Children's Hospital, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I1288882113"]},{"raw_affiliation_string":"Harvard Medical School, Harvard University, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I136199984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073763243","display_name":"Scott T. Weiss","orcid":"https://orcid.org/0000-0001-7196-303X"},"institutions":[{"id":"https://openalex.org/I1283280774","display_name":"Brigham and Women's Hospital","ror":"https://ror.org/04b6nzv94","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283280774","https://openalex.org/I48633490"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Scott T. Weiss","raw_affiliation_strings":["Channing Division of Network Medicine, Department of Medicine, Brigham and Women's Hospital, Boston, MA, 02115, USA","Harvard Medical School, Harvard University, Boston, MA, 02115, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Channing Division of Network Medicine, Department of Medicine, Brigham and Women's Hospital, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I1283280774"]},{"raw_affiliation_string":"Harvard Medical School, Harvard University, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I136199984"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002736890","display_name":"Christoph Lange","orcid":"https://orcid.org/0000-0003-2620-1030"},"institutions":[{"id":"https://openalex.org/I1283280774","display_name":"Brigham and Women's Hospital","ror":"https://ror.org/04b6nzv94","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283280774","https://openalex.org/I48633490"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christoph Lange","raw_affiliation_strings":["Channing Division of Network Medicine, Department of Medicine, Brigham and Women's Hospital, Boston, MA, 02115, USA","Department of Biostatistics, T.H. Chan School of Public Health, Harvard University, Boston, MA, 02115, USA","Harvard Medical School, Harvard University, Boston, MA, 02115, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Channing Division of Network Medicine, Department of Medicine, Brigham and Women's Hospital, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I1283280774"]},{"raw_affiliation_string":"Department of Biostatistics, T.H. Chan School of Public Health, Harvard University, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I136199984"]},{"raw_affiliation_string":"Harvard Medical School, Harvard University, Boston, MA, 02115, USA","institution_ids":["https://openalex.org/I136199984"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5010215905"],"corresponding_institution_ids":["https://openalex.org/I136199984"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":0.1008,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.46278189,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"23","issue":"1","first_page":"547","last_page":"547"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10118","display_name":"SARS-CoV-2 and COVID-19 Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2725","display_name":"Infectious Diseases"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10118","display_name":"SARS-CoV-2 and COVID-19 Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2725","display_name":"Infectious Diseases"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11135","display_name":"Virology and Viral Diseases","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12576","display_name":"vaccines and immunoinformatics approaches","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.6511611938476562},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.6104446053504944},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.5686630606651306},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.566014289855957},{"id":"https://openalex.org/keywords/jaccard-index","display_name":"Jaccard index","score":0.5629399418830872},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.5081648826599121},{"id":"https://openalex.org/keywords/clade","display_name":"Clade","score":0.4692169427871704},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.46805188059806824},{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.4599956274032593},{"id":"https://openalex.org/keywords/single-nucleotide-polymorphism","display_name":"Single-nucleotide polymorphism","score":0.42968130111694336},{"id":"https://openalex.org/keywords/lineage","display_name":"Lineage (genetic)","score":0.4106796383857727},{"id":"https://openalex.org/keywords/phylogenetic-tree","display_name":"Phylogenetic tree","score":0.3403046727180481},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.2833011746406555},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.26055002212524414},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.24066251516342163},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.17845898866653442},{"id":"https://openalex.org/keywords/genotype","display_name":"Genotype","score":0.1271989643573761}],"concepts":[{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.6511611938476562},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.6104446053504944},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.5686630606651306},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.566014289855957},{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.5629399418830872},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.5081648826599121},{"id":"https://openalex.org/C44465124","wikidata":"https://www.wikidata.org/wiki/Q713623","display_name":"Clade","level":4,"score":0.4692169427871704},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.46805188059806824},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.4599956274032593},{"id":"https://openalex.org/C153209595","wikidata":"https://www.wikidata.org/wiki/Q501128","display_name":"Single-nucleotide polymorphism","level":4,"score":0.42968130111694336},{"id":"https://openalex.org/C2776817793","wikidata":"https://www.wikidata.org/wiki/Q6553369","display_name":"Lineage (genetic)","level":3,"score":0.4106796383857727},{"id":"https://openalex.org/C193252679","wikidata":"https://www.wikidata.org/wiki/Q242125","display_name":"Phylogenetic tree","level":3,"score":0.3403046727180481},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.2833011746406555},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.26055002212524414},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.24066251516342163},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.17845898866653442},{"id":"https://openalex.org/C135763542","wikidata":"https://www.wikidata.org/wiki/Q106016","display_name":"Genotype","level":3,"score":0.1271989643573761},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0}],"mesh":[{"descriptor_ui":"D000086382","descriptor_name":"COVID-19","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000086382","descriptor_name":"COVID-19","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000086382","descriptor_name":"COVID-19","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000086402","descriptor_name":"SARS-CoV-2","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000086402","descriptor_name":"SARS-CoV-2","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000086402","descriptor_name":"SARS-CoV-2","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001483","descriptor_name":"Base Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001483","descriptor_name":"Base Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001483","descriptor_name":"Base Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1186/s12859-022-05105-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-022-05105-y","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/s12859-022-05105-y","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:36536276","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/36536276","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:428c807d66334f48a8d811a6960611da","is_oa":true,"landing_page_url":"https://doaj.org/article/428c807d66334f48a8d811a6960611da","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 23, Iss 1, Pp 1-18 (2022)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:9761049","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/9761049","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/s12859-022-05105-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-022-05105-y","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/s12859-022-05105-y","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.8399999737739563,"display_name":"Good health and well-being","id":"https://metadata.un.org/sdg/3"}],"awards":[{"id":"https://openalex.org/G1636473632","display_name":null,"funder_award_id":"P01HL120839","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G2101732747","display_name":null,"funder_award_id":"U01HL089897","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G2200354983","display_name":null,"funder_award_id":"GRFP 1745302","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3089130608","display_name":null,"funder_award_id":"2033046","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4481455470","display_name":null,"funder_award_id":"ES002109","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G6237335845","display_name":null,"funder_award_id":"P01HL132825","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G6804949641","display_name":null,"funder_award_id":"1745302","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G784628591","display_name":null,"funder_award_id":"P30-ES002109","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G85989041","display_name":null,"funder_award_id":"U01HL089856","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4312113617.pdf","grobid_xml":"https://content.openalex.org/works/W4312113617.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W1970377488","https://openalex.org/W2127774996","https://openalex.org/W2302778906","https://openalex.org/W2538584349","https://openalex.org/W2587970647","https://openalex.org/W2592682870","https://openalex.org/W2605343262","https://openalex.org/W3023868120","https://openalex.org/W3040012249","https://openalex.org/W3087003652","https://openalex.org/W3118514422","https://openalex.org/W3175100919","https://openalex.org/W4206763318","https://openalex.org/W4225660627","https://openalex.org/W4284886202","https://openalex.org/W4293169002","https://openalex.org/W6606066242"],"related_works":["https://openalex.org/W4254879869","https://openalex.org/W3022576529","https://openalex.org/W2628526247","https://openalex.org/W2050399337","https://openalex.org/W4284881927","https://openalex.org/W4285093259","https://openalex.org/W1989890420","https://openalex.org/W4385295002","https://openalex.org/W4308566686","https://openalex.org/W2173655985"],"abstract_inverted_index":{"As":[0,66],"of":[1,43,52,55,84,103,111,141],"June":[2],"2022,":[3],"the":[4,20,38,41,44,50,59,70,78,120,179],"GISAID":[5],"database":[6],"contains":[7],"more":[8],"than":[9],"11":[10],"million":[11],"SARS-CoV-2":[12,30,79],"genomes,":[13],"including":[14],"several":[15],"thousand":[16],"nucleotide":[17,56,101,134,139],"sequences":[18,57,85,102,112,140],"for":[19],"most":[21],"common":[22,104,142],"variants":[23,105,115,143,174],"such":[24,90],"as":[25,91,145,178],"delta":[26],"or":[27,94,148],"omicron.":[28],"These":[29],"strains":[31],"have":[32],"been":[33],"collected":[34],"from":[35,114],"patients":[36],"around":[37],"world":[39],"since":[40],"beginning":[42],"pandemic.":[45,121],"We":[46,136,160],"start":[47],"by":[48,123],"assessing":[49],"similarity":[51],"all":[53],"pairs":[54],"using":[58],"Jaccard":[60],"index":[61],"and":[62],"principal":[63],"component":[64],"analysis.":[65],"shown":[67],"previously":[68],"in":[69,82,109,129,175],"literature,":[71],"an":[72],"unsupervised":[73],"cluster":[74],"analysis":[75],"applied":[76],"to":[77,87,133,171],"genomes":[80],"results":[81],"clusters":[83,110],"according":[86],"certain":[88],"characteristics":[89],"their":[92,95],"strain":[93],"clade.":[96],"Importantly,":[97],"we":[98,126],"observe":[99],"that":[100,138,162],"are":[106,127],"often":[107],"outliers":[108],"stemming":[113],"identified":[116,152],"earlier":[117],"on":[118,155],"during":[119],"Motivated":[122],"this":[124],"finding,":[125],"interested":[128],"applying":[130],"outlier":[131,158,163],"detection":[132,164],"sequences.":[135],"demonstrate":[137],"(such":[144],"alpha,":[146],"delta,":[147],"omicron)":[149],"can":[150],"be":[151,166],"solely":[153],"based":[154],"a":[156,167],"statistical":[157],"criterion.":[159],"argue":[161],"might":[165],"useful":[168],"surveillance":[169],"tool":[170],"identify":[172],"emerging":[173],"real":[176],"time":[177],"pandemic":[180],"progresses.":[181]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
