{"id":"https://openalex.org/W7141628184","doi":"https://doi.org/10.1371/journal.pcbi.1014125","title":"An improved dataset for predicting mammal infecting viruses from genetic sequence information","display_name":"An improved dataset for predicting mammal infecting viruses from genetic sequence information","publication_year":2026,"publication_date":"2026-03-27","ids":{"openalex":"https://openalex.org/W7141628184","doi":"https://doi.org/10.1371/journal.pcbi.1014125","pmid":"https://pubmed.ncbi.nlm.nih.gov/41894437"},"language":"en","primary_location":{"id":"doi:10.1371/journal.pcbi.1014125","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1014125","pdf_url":null,"source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1371/journal.pcbi.1014125","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061717074","display_name":"Tyler Reddy","orcid":"https://orcid.org/0000-0003-2364-6157"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tyler Reddy","raw_affiliation_strings":["CAI-1: Applied Computer Science, Los Alamos National Laboratory, Los Alamos, New Mexico"],"raw_orcid":"https://orcid.org/0000-0003-2364-6157","affiliations":[{"raw_affiliation_string":"CAI-1: Applied Computer Science, Los Alamos National Laboratory, Los Alamos, New Mexico","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125183467","display_name":"Austin Schneider","orcid":null},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Austin Schneider","raw_affiliation_strings":["P-2: Applied And Fundamental Physics, Los Alamos National Laboratory, Los Alamos, New Mexico"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"P-2: Applied And Fundamental Physics, Los Alamos National Laboratory, Los Alamos, New Mexico","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067412813","display_name":"Aaron R. Hall","orcid":"https://orcid.org/0000-0002-6194-8060"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aaron R. Hall","raw_affiliation_strings":["CAI-1: Applied Computer Science, Los Alamos National Laboratory, Los Alamos, New Mexico"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CAI-1: Applied Computer Science, Los Alamos National Laboratory, Los Alamos, New Mexico","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130734019","display_name":"Adam Witmer","orcid":null},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adam Witmer","raw_affiliation_strings":["CAI-1: Applied Computer Science, Los Alamos National Laboratory, Los Alamos, New Mexico"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CAI-1: Applied Computer Science, Los Alamos National Laboratory, Los Alamos, New Mexico","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5130798564","display_name":"Nick Hengartner","orcid":null},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nick Hengartner","raw_affiliation_strings":["T-6: Theoretical Biology and Biophysics, Los Alamos National Laboratory, Los Alamos, New Mexico"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"T-6: Theoretical Biology and Biophysics, Los Alamos National Laboratory, Los Alamos, New Mexico","institution_ids":["https://openalex.org/I1343871089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I1343871089"],"apc_list":{"value":2655,"currency":"USD","value_usd":2655},"apc_paid":{"value":2655,"currency":"USD","value_usd":2655},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39852436,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"22","issue":"3","first_page":"e1014125","last_page":"e1014125"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12492","display_name":"Zoonotic diseases and public health","score":0.7204999923706055,"subfield":{"id":"https://openalex.org/subfields/2739","display_name":"Public Health, Environmental and Occupational Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T12492","display_name":"Zoonotic diseases and public health","score":0.7204999923706055,"subfield":{"id":"https://openalex.org/subfields/2739","display_name":"Public Health, Environmental and Occupational Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.041999999433755875,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11048","display_name":"Bacteriophages and microbial interactions","score":0.017999999225139618,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.7470999956130981},{"id":"https://openalex.org/keywords/mammal","display_name":"Mammal","score":0.5662999749183655},{"id":"https://openalex.org/keywords/phylogenetic-tree","display_name":"Phylogenetic tree","score":0.5562999844551086},{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.4480000138282776},{"id":"https://openalex.org/keywords/host","display_name":"Host (biology)","score":0.42910000681877136},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.3619000017642975},{"id":"https://openalex.org/keywords/taxonomic-rank","display_name":"Taxonomic rank","score":0.3601999878883362},{"id":"https://openalex.org/keywords/non-human-primate","display_name":"Non human primate","score":0.3255000114440918}],"concepts":[{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.7470999956130981},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.5914999842643738},{"id":"https://openalex.org/C2778234026","wikidata":"https://www.wikidata.org/wiki/Q7377","display_name":"Mammal","level":2,"score":0.5662999749183655},{"id":"https://openalex.org/C193252679","wikidata":"https://www.wikidata.org/wiki/Q242125","display_name":"Phylogenetic tree","level":3,"score":0.5562999844551086},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5008999705314636},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.48010000586509705},{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.4480000138282776},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44130000472068787},{"id":"https://openalex.org/C126831891","wikidata":"https://www.wikidata.org/wiki/Q221673","display_name":"Host (biology)","level":2,"score":0.42910000681877136},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.38999998569488525},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3840999901294708},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.3619000017642975},{"id":"https://openalex.org/C189592816","wikidata":"https://www.wikidata.org/wiki/Q427626","display_name":"Taxonomic rank","level":3,"score":0.3601999878883362},{"id":"https://openalex.org/C3020794687","wikidata":"https://www.wikidata.org/wiki/Q7380","display_name":"Non human primate","level":2,"score":0.3255000114440918},{"id":"https://openalex.org/C2780509455","wikidata":"https://www.wikidata.org/wiki/Q7380","display_name":"Primate","level":2,"score":0.32359999418258667},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C90132467","wikidata":"https://www.wikidata.org/wiki/Q171184","display_name":"Phylogenetics","level":3,"score":0.30329999327659607},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.30169999599456787},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C112789634","wikidata":"https://www.wikidata.org/wiki/Q18207010","display_name":"False positives and false negatives","level":3,"score":0.2678999900817871},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C2989486834","wikidata":"https://www.wikidata.org/wiki/Q3808900","display_name":"True positive rate","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C45484198","wikidata":"https://www.wikidata.org/wiki/Q827246","display_name":"Sequence alignment","level":4,"score":0.25929999351501465},{"id":"https://openalex.org/C88031987","wikidata":"https://www.wikidata.org/wiki/Q1377767","display_name":"Multiple sequence alignment","level":5,"score":0.25839999318122864},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.25769999623298645},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.25279998779296875}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000098412","descriptor_name":"Predictive Learning Models","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000098429","descriptor_name":"Classification Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000098437","descriptor_name":"Prediction Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008322","descriptor_name":"Mammals","qualifier_ui":"Q000821","qualifier_name":"virology","is_major_topic":true},{"descriptor_ui":"D008322","descriptor_name":"Mammals","qualifier_ui":"Q000821","qualifier_name":"virology","is_major_topic":true},{"descriptor_ui":"D008322","descriptor_name":"Mammals","qualifier_ui":"Q000821","qualifier_name":"virology","is_major_topic":true},{"descriptor_ui":"D008322","descriptor_name":"Mammals","qualifier_ui":"Q000821","qualifier_name":"virology","is_major_topic":true},{"descriptor_ui":"D010802","descriptor_name":"Phylogeny","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011323","descriptor_name":"Primates","qualifier_ui":"Q000821","qualifier_name":"virology","is_major_topic":false},{"descriptor_ui":"D011323","descriptor_name":"Primates","qualifier_ui":"Q000821","qualifier_name":"virology","is_major_topic":false},{"descriptor_ui":"D011323","descriptor_name":"Primates","qualifier_ui":"Q000821","qualifier_name":"virology","is_major_topic":false},{"descriptor_ui":"D014777","descriptor_name":"Virus Diseases","qualifier_ui":"Q000821","qualifier_name":"virology","is_major_topic":true},{"descriptor_ui":"D014780","descriptor_name":"Viruses","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":true},{"descriptor_ui":"D014780","descriptor_name":"Viruses","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":true},{"descriptor_ui":"D014780","descriptor_name":"Viruses","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":true},{"descriptor_ui":"D014780","descriptor_name":"Viruses","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D014780","descriptor_name":"Viruses","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D014780","descriptor_name":"Viruses","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D014780","descriptor_name":"Viruses","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D016679","descriptor_name":"Genome, Viral","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016679","descriptor_name":"Genome, Viral","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016679","descriptor_name":"Genome, Viral","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016679","descriptor_name":"Genome, Viral","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1371/journal.pcbi.1014125","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1014125","pdf_url":null,"source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},{"id":"pmid:41894437","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41894437","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS computational biology","raw_type":null}],"best_oa_location":{"id":"doi:10.1371/journal.pcbi.1014125","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1014125","pdf_url":null,"source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6269430474","display_name":null,"funder_award_id":"20230044D","funder_id":"https://openalex.org/F4320338304","funder_display_name":"Los Alamos National Laboratory"}],"funders":[{"id":"https://openalex.org/F4320338304","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2005160670","https://openalex.org/W2011301426","https://openalex.org/W2058412361","https://openalex.org/W2059157309","https://openalex.org/W2071467437","https://openalex.org/W2073519406","https://openalex.org/W2105656400","https://openalex.org/W2105698649","https://openalex.org/W2117539524","https://openalex.org/W2277953252","https://openalex.org/W2291003811","https://openalex.org/W2295598076","https://openalex.org/W2307213872","https://openalex.org/W2397429809","https://openalex.org/W2562319768","https://openalex.org/W2700382303","https://openalex.org/W2791206743","https://openalex.org/W2923014074","https://openalex.org/W2949676527","https://openalex.org/W2950509696","https://openalex.org/W2961729414","https://openalex.org/W2965629790","https://openalex.org/W3003257820","https://openalex.org/W3016074853","https://openalex.org/W3032757820","https://openalex.org/W3035965352","https://openalex.org/W3125614524","https://openalex.org/W3145674800","https://openalex.org/W3146944767","https://openalex.org/W3150635270","https://openalex.org/W3177828909","https://openalex.org/W3204961752","https://openalex.org/W4205773061","https://openalex.org/W4287844716","https://openalex.org/W4366602379","https://openalex.org/W4401455507","https://openalex.org/W4401951602","https://openalex.org/W4407927925","https://openalex.org/W4410587978"],"related_works":[],"abstract_inverted_index":{"There":[0],"have":[1,279],"been":[2],"several":[3,109],"attempts":[4],"to":[5,11,76,91,154,188,193,218,239,264,272,316,319,343],"develop":[6],"machine":[7,167,269],"learning":[8,168,270],"(ML)":[9],"models":[10,28,33,169,334,389],"identify":[12],"human":[13,151,212,274,399],"infecting":[14,62],"viruses":[15,173,355],"from":[16,68,174,214,237],"their":[17,175],"genomic":[18,176],"sequences,":[19],"with":[20,42,223,329,363],"varying":[21],"degrees":[22],"of":[23,60,72,86,147,156,165,209,211,245,268,285,321,332,348,398,402],"success.":[24],"Direct":[25],"comparison":[26],"between":[27,229,302,377],"is":[29,117,289,369,374],"problematic,":[30],"because":[31],"these":[32],"are":[34,327,405],"typically":[35],"trained":[36],"and":[37,48,63,94,100,123,145,231,262,304,325,382],"evaluated":[38],"on":[39],"different":[40],"datasets":[41],"alternative":[43],"data":[44,131,368],"splitting":[45],"schemes,":[46],"features,":[47],"model":[49,323],"performance":[50,116,164],"metrics.":[51],"In":[52],"this":[53,159,364],"paper":[54],"we":[55,161,278,326],"present":[56],"a":[57,139,359],"standardized":[58,266],"dataset":[59,187,261],"mammal":[61,246],"non-infecting":[64],"viral":[65,378],"pathogens,":[66],"refined":[67],"the":[69,78,84,92,124,163,194,204,224,299,330,352,367],"previous":[70,112],"work":[71],"Mollentze":[73,200],"et":[74,201],"al.":[75],"include":[77],"latest":[79],"literature":[80],"evidence,":[81],"roughly":[82],"doubling":[83],"number":[85],"curated":[87],"host-virus":[88],"records":[89],"available":[90],"community,":[93],"new":[95,103],"host":[96,104,243,275,287,337],"target":[97],"labels,":[98],"primate":[99,133],"mammal.":[101],"The":[102,241],"labels":[105],"were":[106],"included":[107,406],"for":[108,132,142,150,170,335],"reasons,":[110],"including":[111],"reports":[113],"that":[114,126,135,180,283,296,311,354,372],"classification":[115,284],"better":[118,392],"at":[119,253,292,396],"broader":[120],"taxonomic":[121,294],"ranks":[122],"idea":[125],"there":[127,373],"may":[128],"be":[129,249,317,341],"more":[130,290],"infection":[134,152,213,247,288,400],"might":[136],"serve":[137],"as":[138],"suitable":[140],"proxy":[141],"zoonotic":[143],"potential":[144],"avoidance":[146],"false":[148],"positives":[149],"due":[153],"absence":[155],"evidence.":[157],"On":[158],"dataset,":[160],"report":[162],"eight":[166],"predicting":[171],"mammal-infecting":[172],"sequences.":[177],"We":[178,257],"find":[179],"randomly":[181],"assigning":[182],"cases":[183],"in":[184,199,226,346,380],"our":[185,259],"improved":[186,260],"training/testing":[189,198],"sets,":[190],"when":[191,366],"compared":[192],"original":[195],"assignments":[196],"into":[197],"al.,":[202],"increases":[203],"overall":[205],"average":[206],"ROC":[207],"AUC":[208,408,415],"prediction":[210,338,397],"0.663":[215],"\u00b1":[216,220,255,410,417],"0.070":[217],"0.784":[219],"0.013,":[221],"consistent":[222],"reduction":[225],"phylogenetic":[227,300],"distance":[228,301],"train":[230],"test":[232,305,383],"sets":[233,306,384],"(relative":[234,385],"entropy":[235,386],"change":[236],"3.00":[238],"0.08).":[240],"broadest":[242],"category":[244],"can":[248,307,339],"predicted":[250],"most":[251],"reliably":[252],"0.850":[254],"0.020.":[256],"share":[258,358],"code":[263],"enable":[265],"comparisons":[267],"methods":[271],"predict":[273],"infections.":[276],"Overall,":[277],"presented":[280],"preliminary":[281],"evidence":[282],"virus":[286,336],"tractable":[291],"higher":[293],"ranks,":[295],"unsurprisingly":[297],"reducing":[298],"training":[303,381],"improve":[308],"predictive":[309],"performance,":[310,324],"peptide":[312],"kmer":[313],"features":[314],"appear":[315],"harmful":[318],"out":[320,347],"sample":[322,349],"left":[328],"question":[331],"whether":[333,403],"reasonably":[340],"expected":[342],"perform":[344,390],"well":[345],"scenarios":[350],"given":[351],"likelihood":[353],"do":[356],"not":[357,413],"common":[360],"ancestor.":[361],"Consistent":[362],"concern,":[365],"resampled":[370],"such":[371],"no":[375,391],"overlap":[376],"families":[379],">":[387],"24),":[388],"than":[393],"random":[394],"chance":[395],"regardless":[401],"kmers":[404],"(ROC":[407,414],"0.50":[409,416],"0.08)":[411],"or":[412],"0.04).":[418]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2026-03-28T00:00:00"}
