{"id":"https://openalex.org/W4221138532","doi":"https://doi.org/10.1093/bib/bbac258","title":"Accurate identification of bacteriophages from metagenomic data using Transformer","display_name":"Accurate identification of bacteriophages from metagenomic data using Transformer","publication_year":2022,"publication_date":"2022-06-30","ids":{"openalex":"https://openalex.org/W4221138532","doi":"https://doi.org/10.1093/bib/bbac258","pmid":"https://pubmed.ncbi.nlm.nih.gov/35769000"},"language":"en","primary_location":{"id":"doi:10.1093/bib/bbac258","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbac258","pdf_url":"https://academic.oup.com/bib/article-pdf/23/4/bbac258/45017900/bbac258.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://academic.oup.com/bib/article-pdf/23/4/bbac258/45017900/bbac258.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083607843","display_name":"Jiayu Shang","orcid":"https://orcid.org/0000-0001-5974-4985"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jiayu Shang","raw_affiliation_strings":["Department of Electrical Engineering, City University of Hong Kong , Hong Kong (SAR) , China"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, City University of Hong Kong , Hong Kong (SAR) , China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076779212","display_name":"Xubo Tang","orcid":"https://orcid.org/0000-0003-1304-6983"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xubo Tang","raw_affiliation_strings":["Department of Electrical Engineering, City University of Hong Kong , Hong Kong (SAR) , China"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, City University of Hong Kong , Hong Kong (SAR) , China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054719216","display_name":"Ruocheng Guo","orcid":"https://orcid.org/0000-0002-8522-6142"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ruocheng Guo","raw_affiliation_strings":["School of Data Science, City University of Hong Kong , Hong Kong (SAR) , China"],"affiliations":[{"raw_affiliation_string":"School of Data Science, City University of Hong Kong , Hong Kong (SAR) , China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081855118","display_name":"Yanni Sun","orcid":"https://orcid.org/0000-0003-1373-8023"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Yanni Sun","raw_affiliation_strings":["Department of Electrical Engineering, City University of Hong Kong , Hong Kong (SAR) , China"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, City University of Hong Kong , Hong Kong (SAR) , China","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5081855118"],"corresponding_institution_ids":["https://openalex.org/I168719708"],"apc_list":{"value":4011,"currency":"USD","value_usd":4011},"apc_paid":{"value":4011,"currency":"USD","value_usd":4011},"fwci":9.5067,"has_fulltext":true,"cited_by_count":56,"citation_normalized_percentile":{"value":0.98571863,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"23","issue":"4","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11048","display_name":"Bacteriophages and microbial interactions","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11048","display_name":"Bacteriophages and microbial interactions","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11791","display_name":"Microbial Community Ecology and Physiology","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.9273635149002075},{"id":"https://openalex.org/keywords/contig","display_name":"Contig","score":0.7952896356582642},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.645893931388855},{"id":"https://openalex.org/keywords/refseq","display_name":"RefSeq","score":0.569123387336731},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.5476176142692566},{"id":"https://openalex.org/keywords/replicate","display_name":"Replicate","score":0.5476070046424866},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.5200604796409607},{"id":"https://openalex.org/keywords/microbiome","display_name":"Microbiome","score":0.4461105763912201},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.42752906680107117},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.4248031973838806},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.27595919370651245},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.2443489134311676}],"concepts":[{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.9273635149002075},{"id":"https://openalex.org/C59582021","wikidata":"https://www.wikidata.org/wiki/Q1128751","display_name":"Contig","level":4,"score":0.7952896356582642},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.645893931388855},{"id":"https://openalex.org/C151810110","wikidata":"https://www.wikidata.org/wiki/Q7307074","display_name":"RefSeq","level":4,"score":0.569123387336731},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.5476176142692566},{"id":"https://openalex.org/C2781162219","wikidata":"https://www.wikidata.org/wiki/Q26250693","display_name":"Replicate","level":2,"score":0.5476070046424866},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.5200604796409607},{"id":"https://openalex.org/C143121216","wikidata":"https://www.wikidata.org/wiki/Q1330402","display_name":"Microbiome","level":2,"score":0.4461105763912201},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.42752906680107117},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.4248031973838806},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.27595919370651245},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.2443489134311676},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D001419","descriptor_name":"Bacteria","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D001419","descriptor_name":"Bacteria","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D001419","descriptor_name":"Bacteria","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D001435","descriptor_name":"Bacteriophages","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D001435","descriptor_name":"Bacteriophages","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D001435","descriptor_name":"Bacteriophages","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D054892","descriptor_name":"Metagenome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D054892","descriptor_name":"Metagenome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D054892","descriptor_name":"Metagenome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D064307","descriptor_name":"Microbiota","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D064307","descriptor_name":"Microbiota","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D064307","descriptor_name":"Microbiota","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":5,"locations":[{"id":"doi:10.1093/bib/bbac258","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbac258","pdf_url":"https://academic.oup.com/bib/article-pdf/23/4/bbac258/45017900/bbac258.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},{"id":"pmid:35769000","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35769000","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in bioinformatics","raw_type":null},{"id":"pmh:oai:arXiv.org:2201.04778","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2201.04778","pdf_url":"https://arxiv.org/pdf/2201.04778","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:pure.atira.dk:publications/35993a8d-f4a4-4172-a39a-259a9aba060c","is_oa":true,"landing_page_url":"https://hdl.handle.net/2031/35993a8d-f4a4-4172-a39a-259a9aba060c","pdf_url":"https://scholars.cityu.edu.hk/files/110089948/108407986.pdf","source":{"id":"https://openalex.org/S7407055387","display_name":"CityU Scholars","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Shang, J, Tang, X, Guo, R & Sun, Y 2022, 'Accurate identification of bacteriophages from metagenomic data using Transformer', Briefings in Bioinformatics, vol. 23, no. 4, bbac258. https://doi.org/10.1093/bib/bbac258","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:9294416","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/9294416","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Brief Bioinform","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1093/bib/bbac258","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbac258","pdf_url":"https://academic.oup.com/bib/article-pdf/23/4/bbac258/45017900/bbac258.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1859322275","display_name":null,"funder_award_id":"InnoHK","funder_id":"https://openalex.org/F4320321920","funder_display_name":"Innovation and Technology Commission"},{"id":"https://openalex.org/G2642563814","display_name":null,"funder_award_id":"CIMDA","funder_id":"https://openalex.org/F4320321920","funder_display_name":"Innovation and Technology Commission"},{"id":"https://openalex.org/G3800229870","display_name":null,"funder_award_id":"7005453","funder_id":"https://openalex.org/F4320309893","funder_display_name":"City University of Hong Kong"},{"id":"https://openalex.org/G711425152","display_name":null,"funder_award_id":"9678241","funder_id":"https://openalex.org/F4320309893","funder_display_name":"City University of Hong Kong"}],"funders":[{"id":"https://openalex.org/F4320309893","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23"},{"id":"https://openalex.org/F4320321920","display_name":"Innovation and Technology Commission","ror":"https://ror.org/04vf9tr09"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4221138532.pdf","grobid_xml":"https://content.openalex.org/works/W4221138532.grobid-xml"},"referenced_works_count":65,"referenced_works":["https://openalex.org/W226855855","https://openalex.org/W1949003960","https://openalex.org/W2036897871","https://openalex.org/W2045204781","https://openalex.org/W2090817404","https://openalex.org/W2119859604","https://openalex.org/W2124166542","https://openalex.org/W2143485490","https://openalex.org/W2158406706","https://openalex.org/W2175526948","https://openalex.org/W2194775991","https://openalex.org/W2245444176","https://openalex.org/W2526038422","https://openalex.org/W2593449349","https://openalex.org/W2599417231","https://openalex.org/W2604272474","https://openalex.org/W2610784236","https://openalex.org/W2612690371","https://openalex.org/W2732139758","https://openalex.org/W2747329762","https://openalex.org/W2782623083","https://openalex.org/W2803143214","https://openalex.org/W2896457183","https://openalex.org/W2904213287","https://openalex.org/W2940459311","https://openalex.org/W2942757983","https://openalex.org/W2944870141","https://openalex.org/W2949831026","https://openalex.org/W2950438666","https://openalex.org/W2950784811","https://openalex.org/W2952204926","https://openalex.org/W2961387603","https://openalex.org/W2962958286","https://openalex.org/W2962975498","https://openalex.org/W2973579020","https://openalex.org/W2992400060","https://openalex.org/W3003110834","https://openalex.org/W3014332420","https://openalex.org/W3028145155","https://openalex.org/W3032191947","https://openalex.org/W3091917690","https://openalex.org/W3091956242","https://openalex.org/W3097008984","https://openalex.org/W3107997545","https://openalex.org/W3118981004","https://openalex.org/W3127656915","https://openalex.org/W3136918052","https://openalex.org/W3184005698","https://openalex.org/W3195881367","https://openalex.org/W3215755691","https://openalex.org/W4200441850","https://openalex.org/W4200505767","https://openalex.org/W4214943162","https://openalex.org/W4225255896","https://openalex.org/W4234552385","https://openalex.org/W4236763430","https://openalex.org/W4246460447","https://openalex.org/W4285788416","https://openalex.org/W4294170691","https://openalex.org/W4295838474","https://openalex.org/W4319324331","https://openalex.org/W4385245566","https://openalex.org/W4394666973","https://openalex.org/W6755207826","https://openalex.org/W6780226713"],"related_works":["https://openalex.org/W2330186985","https://openalex.org/W2950447380","https://openalex.org/W2055448750","https://openalex.org/W2345730070","https://openalex.org/W2107854630","https://openalex.org/W4319294554","https://openalex.org/W3134563641","https://openalex.org/W1986608492","https://openalex.org/W4394186879","https://openalex.org/W3003110834"],"abstract_inverted_index":{"In":[0,101],"this":[1],"work,":[2],"we":[3,22],"adopt":[4],"the":[5,26,30,37,43,49,54,86,91,98,102,109],"state-of-the-art":[6,99],"language":[7],"model,":[8],"Transformer,":[9],"to":[10],"conduct":[11],"contextual":[12],"embedding":[13],"for":[14,56],"phage":[15,112],"contigs.":[16,58],"By":[17],"constructing":[18],"a":[19],"protein-cluster":[20],"vocabulary,":[21],"can":[23,41],"feed":[24],"both":[25],"protein":[27,44],"composition":[28],"and":[29,46,52,85],"proteins'":[31],"positions":[32],"from":[33],"each":[34],"contig":[35],"into":[36],"Transformer.":[38],"The":[39],"Transformer":[40],"learn":[42],"organization":[45],"associations":[47],"using":[48],"self-attention":[50],"mechanism":[51],"predicts":[53],"label":[55],"test":[57],"We":[59],"rigorously":[60],"tested":[61],"our":[62],"developed":[63],"tool":[64],"named":[65],"PhaMer":[66,96,107],"on":[67],"multiple":[68],"datasets":[69],"with":[70],"increasing":[71],"difficulty,":[72],"including":[73],"quality":[74],"RefSeq":[75],"genomes,":[76],"short":[77],"contigs,":[78],"simulated":[79],"metagenomic":[80,83,104],"data,":[81],"mock":[82],"data":[84,105],"public":[87],"IMG/VR":[88],"dataset.":[89],"All":[90],"experimental":[92],"results":[93],"show":[94],"that":[95],"outperforms":[97],"tools.":[100],"real":[103],"experiment,":[106],"improves":[108],"F1-score":[110],"of":[111],"detection":[113],"by":[114],"27%.":[115]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":20},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
