{"id":"https://openalex.org/W4417525644","doi":"https://doi.org/10.1186/s12859-025-06326-7","title":"Hierarchical clustering-based coarse-to-fine classification framework for microbial protein function prediction","display_name":"Hierarchical clustering-based coarse-to-fine classification framework for microbial protein function prediction","publication_year":2025,"publication_date":"2025-12-20","ids":{"openalex":"https://openalex.org/W4417525644","doi":"https://doi.org/10.1186/s12859-025-06326-7","pmid":"https://pubmed.ncbi.nlm.nih.gov/41421972"},"language":"en","primary_location":{"id":"doi:10.1186/s12859-025-06326-7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06326-7","pdf_url":null,"source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1186/s12859-025-06326-7","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101956655","display_name":"Shengyang Chen","orcid":"https://orcid.org/0000-0001-9961-8422"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengyang Chen","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, 100876, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, 100876, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005756945","display_name":"Xinyue Gao","orcid":"https://orcid.org/0000-0003-1081-2347"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyue Gao","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, 100876, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, 100876, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100443182","display_name":"Congmin Zhu","orcid":"https://orcid.org/0000-0002-0472-9128"},"institutions":[{"id":"https://openalex.org/I183519381","display_name":"Capital Medical University","ror":"https://ror.org/013xs5b60","country_code":"CN","type":"education","lineage":["https://openalex.org/I183519381"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Congmin Zhu","raw_affiliation_strings":["Beijing Key Laboratory of Fundamental Research on Biomechanics in Clinical Application, Capital Medical University, Beijing, 100069, China","School of Biomedical Engineering, Capital Medical University, Beijing, 100069, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Fundamental Research on Biomechanics in Clinical Application, Capital Medical University, Beijing, 100069, China","institution_ids":["https://openalex.org/I183519381"]},{"raw_affiliation_string":"School of Biomedical Engineering, Capital Medical University, Beijing, 100069, China","institution_ids":["https://openalex.org/I183519381"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100760049","display_name":"Honglei Liu","orcid":"https://orcid.org/0000-0001-5518-4749"},"institutions":[{"id":"https://openalex.org/I183519381","display_name":"Capital Medical University","ror":"https://ror.org/013xs5b60","country_code":"CN","type":"education","lineage":["https://openalex.org/I183519381"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Honglei Liu","raw_affiliation_strings":["Beijing Key Laboratory of Fundamental Research on Biomechanics in Clinical Application, Capital Medical University, Beijing, 100069, China","School of Biomedical Engineering, Capital Medical University, Beijing, 100069, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Fundamental Research on Biomechanics in Clinical Application, Capital Medical University, Beijing, 100069, China","institution_ids":["https://openalex.org/I183519381"]},{"raw_affiliation_string":"School of Biomedical Engineering, Capital Medical University, Beijing, 100069, China","institution_ids":["https://openalex.org/I183519381"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056175399","display_name":"Yuqing Yang","orcid":"https://orcid.org/0000-0002-3536-167X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuqing Yang","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, 100876, China. yangyuqing@bupt.edu.cn"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, 100876, China. yangyuqing@bupt.edu.cn","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5056175399"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3194066,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"26","issue":"1","first_page":"301","last_page":"301"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.8629000186920166,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.8629000186920166,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.05460000038146973,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.019300000742077827,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5080999732017517},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.49779999256134033},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4374000132083893},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4083000123500824},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.39739999175071716},{"id":"https://openalex.org/keywords/protein-function-prediction","display_name":"Protein function prediction","score":0.3955000042915344},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.3926999866962433},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.37720000743865967},{"id":"https://openalex.org/keywords/gene-ontology","display_name":"Gene ontology","score":0.36890000104904175}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7379999756813049},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5390999913215637},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5080999732017517},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5076000094413757},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.49880000948905945},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.49779999256134033},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4374000132083893},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4083000123500824},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.39739999175071716},{"id":"https://openalex.org/C207060522","wikidata":"https://www.wikidata.org/wiki/Q7251473","display_name":"Protein function prediction","level":4,"score":0.3955000042915344},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.3926999866962433},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.37720000743865967},{"id":"https://openalex.org/C2987395477","wikidata":"https://www.wikidata.org/wiki/Q135085","display_name":"Gene ontology","level":4,"score":0.36890000104904175},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.3361999988555908},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.335099995136261},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.30059999227523804},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.2897999882698059},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.28780001401901245},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C152662350","wikidata":"https://www.wikidata.org/wiki/Q815297","display_name":"Systems biology","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2815000116825104},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C20901353","wikidata":"https://www.wikidata.org/wiki/Q4117139","display_name":"Biological database","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s12859-025-06326-7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06326-7","pdf_url":null,"source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:41421972","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41421972","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:5981d6d4a4b64a3e8b2e791771e412ae","is_oa":true,"landing_page_url":"https://doaj.org/article/5981d6d4a4b64a3e8b2e791771e412ae","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 26, Iss 1, Pp 1-16 (2025)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:12750710","is_oa":true,"landing_page_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC12750710/","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/s12859-025-06326-7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06326-7","pdf_url":null,"source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6280306084","display_name":null,"funder_award_id":"(62203060, 62403492,82202299)","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2901218091","https://openalex.org/W2935703330","https://openalex.org/W2951433247","https://openalex.org/W2963351448","https://openalex.org/W2965778154","https://openalex.org/W3137270128","https://openalex.org/W3146944767","https://openalex.org/W3166265964","https://openalex.org/W3176196523","https://openalex.org/W3176676637","https://openalex.org/W3177500196","https://openalex.org/W4205773061","https://openalex.org/W4210547400","https://openalex.org/W4214673031","https://openalex.org/W4312577413","https://openalex.org/W4327550249","https://openalex.org/W4387125850","https://openalex.org/W4402253370"],"related_works":[],"abstract_inverted_index":{"Accurate":[0],"prediction":[1,22,78],"of":[2,32,79,195,221],"microbial":[3,10],"protein":[4,20,255],"functions":[5],"is":[6],"essential":[7],"for":[8],"understanding":[9],"physiology,":[11],"discovering":[12],"novel":[13],"probiotics,":[14],"and":[15,29,45,76,111,113,131,145,155,174,179,204,236,253,266],"driving":[16],"biotechnological":[17],"innovation.":[18],"However,":[19],"function":[21,256],"remains":[23],"challenging":[24],"due":[25],"to":[26,56,120,158,197,251],"the":[27,59,187,208],"hierarchical":[28,96,106,147,252],"class-imbalanced":[30],"nature":[31],"functional":[33,74,184,263],"labels,":[34,213],"particularly":[35],"in":[36,241],"large-scale":[37],"annotations":[38],"such":[39],"as":[40],"Enzyme":[41],"Commission":[42],"(EC)":[43],"numbers":[44],"Gene":[46],"Ontology":[47],"(GO)":[48],"terms.":[49],"Most":[50],"existing":[51],"deep":[52],"learning":[53],"approaches":[54],"fail":[55],"adequately":[57],"address":[58],"long-tail":[60,262],"distribution":[61],"problem.":[62],"We":[63],"propose":[64],"a":[65,88,103,141,146],"Hierarchical":[66],"Cascaded":[67],"Context":[68],"Network":[69],"(HCCN)":[70],"that":[71,91,164],"explicitly":[72],"models":[73],"hierarchies":[75],"emphasizes":[77],"low-frequency":[80,212],"(long-tail)":[81],"labels.":[82,97,264],"For":[83,98,211],"EC":[84],"classification,":[85],"we":[86,101,139],"design":[87],"coarse-to-fine":[89],"network":[90],"captures":[92],"parent\u2013child":[93],"dependencies":[94,123],"among":[95],"GO":[99],"prediction,":[100,257],"construct":[102],"semantically":[104],"grounded":[105],"structure":[107],"using":[108],"ontology":[109],"embedding":[110],"clustering,":[112],"develop":[114],"an":[115,248],"attention-based":[116],"multi-level":[117],"cascade":[118],"predictor":[119],"exploit":[121],"structured":[122],"across":[124,181],"Biological":[125],"Process":[126],"(BPO),":[127,201],"Molecular":[128],"Function":[129],"(MFO),":[130,203],"Cellular":[132],"Component":[133],"(CCO).":[134],"To":[135],"mitigate":[136],"label":[137],"imbalance,":[138],"introduce":[140],"dynamic":[142],"resampling":[143],"strategy":[144],"loss":[148],"weighting":[149],"mechanism,":[150],"which":[151],"enforce":[152],"inter-level":[153],"regularization":[154],"enhance":[156],"sensitivity":[157],"rare":[159],"functions.":[160],"Experimental":[161],"results":[162],"show":[163],"HCCN":[165,191,214,245],"consistently":[166],"outperforms":[167],"traditional":[168],"sequence-alignment":[169],"methods":[170],"(e.g.,":[171],"DIAMOND,":[172],"BLAST)":[173],"baseline":[175],"neural":[176],"networks":[177],"(MLP":[178],"DeepGOPlus)":[180],"all":[182],"major":[183],"categories.":[185],"On":[186],"full":[188],"test":[189],"set,":[190],"achieves":[192],"AUPR":[193],"gains":[194],"up":[196],"5.5%":[198],"(EC),":[199],"6.5%":[200],"4.9%":[202],"5.3%":[205],"(CCO)":[206],"over":[207],"best":[209],"baseline.":[210],"demonstrates":[215],"strong":[216],"few-shot":[217],"generalization,":[218],"with":[219],"improvements":[220],"+":[222,227,232,237],"11.2%":[223],"(EC":[224],"low)":[225,230,235,240],",":[226,231],"6.7%":[228],"(BPO":[229],"9.2%":[233],"(MFO":[234],"4.6%":[238],"(CCO":[239],"mAUPR.":[242],"The":[243],"proposed":[244],"framework":[246],"provides":[247],"effective":[249],"solution":[250],"imbalanced":[254],"significantly":[258],"improving":[259],"performance":[260],"on":[261],"Code":[265],"data":[267],"are":[268],"publicly":[269],"available":[270],"at:":[271],"https://github.com/YangLab-BUPT/HCCN":[272],".":[273]},"counts_by_year":[],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2025-12-20T00:00:00"}
