{"id":"https://openalex.org/W7151815813","doi":"https://doi.org/10.48550/arxiv.2604.05775","title":"PhageBench: Can LLMs Understand Raw Bacteriophage Genomes?","display_name":"PhageBench: Can LLMs Understand Raw Bacteriophage Genomes?","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7151815813","doi":"https://doi.org/10.48550/arxiv.2604.05775"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.05775","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05775","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.05775","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133231090","display_name":"Yusen Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hou, Yusen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034040709","display_name":"Weicai Long","orcid":"https://orcid.org/0009-0009-3205-9537"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Long, Weicai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133183963","display_name":"Haitao Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Haitao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133154732","display_name":"Houcheng Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Houcheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133215763","display_name":"Junning Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Junning","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133236066","display_name":"Yanlin Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yanlin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5133231090"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.2484000027179718,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.2484000027179718,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11048","display_name":"Bacteriophages and microbial interactions","score":0.20829999446868896,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12576","display_name":"vaccines and immunoinformatics approaches","score":0.07699999958276749,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6628999710083008},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.491100013256073},{"id":"https://openalex.org/keywords/mirroring","display_name":"Mirroring","score":0.446399986743927},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4246000051498413},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.42170000076293945},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.41339999437332153},{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.3776000142097473},{"id":"https://openalex.org/keywords/bacteriophage","display_name":"Bacteriophage","score":0.3400000035762787}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6628999710083008},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5246000289916992},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.5095999836921692},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.491100013256073},{"id":"https://openalex.org/C189645446","wikidata":"https://www.wikidata.org/wiki/Q350865","display_name":"Mirroring","level":2,"score":0.446399986743927},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4246000051498413},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.42170000076293945},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.41339999437332153},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.412200003862381},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.39989998936653137},{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.3776000142097473},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3450999855995178},{"id":"https://openalex.org/C2776441376","wikidata":"https://www.wikidata.org/wiki/Q165028","display_name":"Bacteriophage","level":4,"score":0.3400000035762787},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.32499998807907104},{"id":"https://openalex.org/C2780902518","wikidata":"https://www.wikidata.org/wiki/Q6033780","display_name":"Inheritance (genetic algorithm)","level":3,"score":0.31679999828338623},{"id":"https://openalex.org/C110332635","wikidata":"https://www.wikidata.org/wiki/Q629498","display_name":"Genetic programming","level":2,"score":0.30570000410079956},{"id":"https://openalex.org/C59582021","wikidata":"https://www.wikidata.org/wiki/Q1128751","display_name":"Contig","level":4,"score":0.3012000024318695},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C33009525","wikidata":"https://www.wikidata.org/wiki/Q208841","display_name":"Coevolution","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.2515999972820282},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.05775","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05775","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.05775","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05775","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Bacteriophages,":[0],"often":[1],"referred":[2],"to":[3,48,70],"as":[4],"the":[5,9,66,77,147],"dark":[6],"matter":[7],"of":[8,26,79,103,149],"biosphere,":[10],"play":[11],"a":[12],"critical":[13],"role":[14],"in":[15,20,115,133],"regulating":[16],"microbial":[17],"ecosystems":[18],"and":[19,32,54,98,119,140],"antibiotic":[21],"alternatives.":[22],"Thus,":[23],"accurate":[24],"interpretation":[25],"their":[27,46],"genomes":[28],"holds":[29],"significant":[30,131],"scientific":[31],"practical":[33],"value.":[34],"While":[35],"general-purpose":[36,108],"Large":[37],"Language":[38],"Models":[39],"(LLMs)":[40],"excel":[41],"at":[42],"understanding":[43,74],"biological":[44,56,158],"texts,":[45],"ability":[47],"directly":[49],"interpret":[50],"raw":[51],"nucleotide":[52],"sequences":[53],"perform":[55],"reasoning":[57,109,135,155],"remains":[58],"underexplored.":[59],"To":[60],"address":[61],"this,":[62],"we":[63],"introduce":[64],"PhageBench,":[65],"first":[67],"benchmark":[68],"designed":[69],"evaluate":[71],"phage":[72,116],"genome":[73],"by":[75],"mirroring":[76],"workflow":[78],"bioinformatics":[80],"experts.":[81],"The":[82],"dataset":[83],"contains":[84],"5,600":[85],"high-quality":[86],"samples":[87],"covering":[88],"five":[89],"core":[90],"tasks":[91,136],"across":[92],"three":[93],"stages:":[94],"Screening,":[95],"Quality":[96],"Control,":[97],"Phenotype":[99],"Annotation.":[100],"Our":[101],"evaluation":[102],"eight":[104],"LLMs":[105],"reveals":[106],"that":[107],"models":[110,152],"significantly":[111],"outperform":[112],"random":[113],"baselines":[114],"contig":[117],"identification":[118],"host":[120],"prediction,":[121],"demonstrating":[122],"promising":[123],"potential":[124],"for":[125,157],"genomic":[126],"understanding.":[127],"However,":[128],"they":[129],"exhibit":[130],"limitations":[132],"complex":[134],"involving":[137],"long-range":[138],"dependencies":[139],"fine-grained":[141],"functional":[142],"localization.":[143],"These":[144],"findings":[145],"highlight":[146],"necessity":[148],"developing":[150],"next-generation":[151],"with":[153],"enhanced":[154],"capabilities":[156],"sequences.":[159]},"counts_by_year":[],"updated_date":"2026-04-09T06:13:59.934233","created_date":"2026-04-09T00:00:00"}
