{"id":"https://openalex.org/W4360938460","doi":"https://doi.org/10.1093/bib/bbad117","title":"Fast and accurate protein function prediction from sequence through pretrained language model and homology-based label diffusion","display_name":"Fast and accurate protein function prediction from sequence through pretrained language model and homology-based label diffusion","publication_year":2023,"publication_date":"2023-03-24","ids":{"openalex":"https://openalex.org/W4360938460","doi":"https://doi.org/10.1093/bib/bbad117","pmid":"https://pubmed.ncbi.nlm.nih.gov/36964722"},"language":"en","primary_location":{"id":"doi:10.1093/bib/bbad117","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbad117","pdf_url":"https://academic.oup.com/bib/article-pdf/24/3/bbad117/50410866/bbad117.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://academic.oup.com/bib/article-pdf/24/3/bbad117/50410866/bbad117.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079232368","display_name":"Qianmu Yuan","orcid":"https://orcid.org/0000-0001-6098-9103"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qianmu Yuan","raw_affiliation_strings":["School of Computer Science and Engineering at Sun Yat-sen University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering at Sun Yat-sen University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007708329","display_name":"Junjie Xie","orcid":"https://orcid.org/0000-0002-1620-4281"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junjie Xie","raw_affiliation_strings":["School of Computer Science and Engineering at Sun Yat-sen University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering at Sun Yat-sen University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079072094","display_name":"Jiancong Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiancong Xie","raw_affiliation_strings":["School of Computer Science and Engineering at Sun Yat-sen University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering at Sun Yat-sen University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029051152","display_name":"Huiying Zhao","orcid":"https://orcid.org/0000-0001-9134-536X"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]},{"id":"https://openalex.org/I4210097354","display_name":"Sun Yat-sen Memorial Hospital","ror":"https://ror.org/01px77p81","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210097354"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Huiying Zhao","raw_affiliation_strings":["Sun Yat-sen Memorial Hospital at Sun Yat-sen University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sun Yat-sen Memorial Hospital at Sun Yat-sen University","institution_ids":["https://openalex.org/I4210097354","https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023539493","display_name":"Yuedong Yang","orcid":"https://orcid.org/0000-0002-6782-2813"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yuedong Yang","raw_affiliation_strings":["School of Computer Science and Engineering at Sun Yat-sen University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering at Sun Yat-sen University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5023539493","https://openalex.org/A5029051152"],"corresponding_institution_ids":["https://openalex.org/I157773358","https://openalex.org/I4210097354"],"apc_list":{"value":4011,"currency":"USD","value_usd":4011},"apc_paid":null,"fwci":14.6501,"has_fulltext":true,"cited_by_count":101,"citation_normalized_percentile":{"value":0.99361447,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"24","issue":"3","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7235596179962158},{"id":"https://openalex.org/keywords/protein-function-prediction","display_name":"Protein function prediction","score":0.6152005195617676},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.509252667427063},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48533716797828674},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4651682376861572},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4599207639694214},{"id":"https://openalex.org/keywords/protein-function","display_name":"Protein function","score":0.4420573115348816},{"id":"https://openalex.org/keywords/sequence-motif","display_name":"Sequence motif","score":0.4181281328201294},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.4152362048625946},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40042513608932495},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.3794441819190979},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.32891055941581726},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.16896098852157593},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.09793928265571594},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.08927452564239502},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08723914623260498}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7235596179962158},{"id":"https://openalex.org/C207060522","wikidata":"https://www.wikidata.org/wiki/Q7251473","display_name":"Protein function prediction","level":4,"score":0.6152005195617676},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.509252667427063},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48533716797828674},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4651682376861572},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4599207639694214},{"id":"https://openalex.org/C2986374874","wikidata":"https://www.wikidata.org/wiki/Q8054","display_name":"Protein function","level":3,"score":0.4420573115348816},{"id":"https://openalex.org/C117745874","wikidata":"https://www.wikidata.org/wiki/Q901612","display_name":"Sequence motif","level":3,"score":0.4181281328201294},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.4152362048625946},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40042513608932495},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3794441819190979},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32891055941581726},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.16896098852157593},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.09793928265571594},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.08927452564239502},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08723914623260498}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D063990","descriptor_name":"Gene Ontology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D063990","descriptor_name":"Gene Ontology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D063990","descriptor_name":"Gene Ontology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1093/bib/bbad117","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbad117","pdf_url":"https://academic.oup.com/bib/article-pdf/24/3/bbad117/50410866/bbad117.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},{"id":"pmid:36964722","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/36964722","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in bioinformatics","raw_type":null}],"best_oa_location":{"id":"doi:10.1093/bib/bbad117","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbad117","pdf_url":"https://academic.oup.com/bib/article-pdf/24/3/bbad117/50410866/bbad117.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4642882069","display_name":null,"funder_award_id":"2022YFF1203100","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G4877307238","display_name":null,"funder_award_id":"2022YFF1203100","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6346021793","display_name":null,"funder_award_id":"2019B020228001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8092729911","display_name":null,"funder_award_id":"12126610","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4360938460.pdf","grobid_xml":"https://content.openalex.org/works/W4360938460.grobid-xml"},"referenced_works_count":57,"referenced_works":["https://openalex.org/W1966716734","https://openalex.org/W1976526581","https://openalex.org/W2045204781","https://openalex.org/W2048559110","https://openalex.org/W2076048958","https://openalex.org/W2095705004","https://openalex.org/W2096358531","https://openalex.org/W2101291993","https://openalex.org/W2103017472","https://openalex.org/W2108984616","https://openalex.org/W2117486996","https://openalex.org/W2151396228","https://openalex.org/W2156386646","https://openalex.org/W2158714788","https://openalex.org/W2164154943","https://openalex.org/W2227395312","https://openalex.org/W2472351724","https://openalex.org/W2522417827","https://openalex.org/W2615066396","https://openalex.org/W2624021832","https://openalex.org/W2770983033","https://openalex.org/W2807567459","https://openalex.org/W2949342052","https://openalex.org/W2951282333","https://openalex.org/W2951731136","https://openalex.org/W2989608901","https://openalex.org/W2999905431","https://openalex.org/W3098485826","https://openalex.org/W3107527779","https://openalex.org/W3112376646","https://openalex.org/W3116099552","https://openalex.org/W3137270128","https://openalex.org/W3146944767","https://openalex.org/W3164046276","https://openalex.org/W3165795318","https://openalex.org/W3179436811","https://openalex.org/W3197696221","https://openalex.org/W3211795435","https://openalex.org/W4200166788","https://openalex.org/W4205989901","https://openalex.org/W4220952154","https://openalex.org/W4220991280","https://openalex.org/W4225687252","https://openalex.org/W4236358448","https://openalex.org/W4287724045","https://openalex.org/W4288089799","https://openalex.org/W4289885706","https://openalex.org/W4295312788","https://openalex.org/W4307180312","https://openalex.org/W4311364193","https://openalex.org/W4312097792","https://openalex.org/W6674330103","https://openalex.org/W6766978945","https://openalex.org/W6769627184","https://openalex.org/W6780145172","https://openalex.org/W6780226713","https://openalex.org/W6840065848"],"related_works":["https://openalex.org/W3093454656","https://openalex.org/W1578069856","https://openalex.org/W124817461","https://openalex.org/W4308360413","https://openalex.org/W2952639490","https://openalex.org/W1530412035","https://openalex.org/W1985522665","https://openalex.org/W1987231763","https://openalex.org/W2101783948","https://openalex.org/W4360938460"],"abstract_inverted_index":{"Protein":[0],"function":[1,191],"prediction":[2,103],"is":[3,104,183,210],"an":[4],"essential":[5],"task":[6],"in":[7,25,146],"bioinformatics":[8],"which":[9,81],"benefits":[10],"disease":[11],"mechanism":[12,179],"elucidation":[13],"and":[14,28,39,93,112,135,144,170,197],"drug":[15],"target":[16],"discovery.":[17],"Due":[18],"to":[19,37,59,87,97,131,164,185],"the":[20,29,109,115,124,149,153,177],"explosive":[21],"growth":[22],"of":[23,31,118,200],"proteins":[24,119,169],"sequence":[26,91,187],"databases":[27],"diversity":[30],"their":[32],"functions,":[33],"it":[34],"remains":[35],"challenging":[36],"fast":[38],"accurately":[40],"predict":[41],"protein":[42,52],"functions":[43,122],"from":[44],"sequences":[45],"alone.":[46],"Although":[47],"many":[48],"methods":[49],"have":[50],"integrated":[51],"structures,":[53],"biological":[54],"networks":[55],"or":[56],"literature":[57],"information":[58,111],"improve":[60],"performance,":[61],"these":[62],"extra":[63],"features":[64],"are":[65,202],"often":[66],"unavailable":[67],"for":[68,114,190],"most":[69],"proteins.":[70],"Here,":[71],"we":[72],"propose":[73],"SPROF-GO,":[74],"a":[75,83],"Sequence-based":[76],"alignment-free":[77],"PROtein":[78],"Function":[79],"predictor,":[80],"leverages":[82],"pretrained":[84],"language":[85],"model":[86],"efficiently":[88],"extract":[89],"informative":[90],"embeddings":[92],"employs":[94],"self-attention":[95],"pooling":[96],"focus":[98],"on":[99,152,167,176],"important":[100],"residues.":[101],"The":[102,193,206],"further":[105],"advanced":[106],"by":[107,139],"exploiting":[108],"homology":[110],"accounting":[113],"overlapping":[116],"communities":[117],"with":[120],"related":[121],"through":[123],"label":[125],"diffusion":[126],"algorithm.":[127],"SPROF-GO":[128,182,201,207],"was":[129,161],"shown":[130],"surpass":[132],"state-of-the-art":[133],"sequence-based":[134],"even":[136],"network-based":[137],"approaches":[138],"more":[140],"than":[141],"14.5,":[142],"27.3":[143],"10.1%":[145],"area":[147],"under":[148],"precision-recall":[150],"curve":[151],"three":[154],"sub-ontology":[155],"test":[156],"sets,":[157],"respectively.":[158],"Our":[159],"method":[160],"also":[162],"demonstrated":[163],"generalize":[165],"well":[166],"non-homologous":[168],"unseen":[171],"species.":[172],"Finally,":[173],"visualization":[174],"based":[175],"attention":[178],"indicated":[180],"that":[181],"able":[184],"capture":[186],"domains":[188],"useful":[189],"prediction.":[192],"datasets,":[194],"source":[195],"codes":[196],"trained":[198],"models":[199],"available":[203,212],"at":[204,213],"https://github.com/biomed-AI/SPROF-GO.":[205],"web":[208],"server":[209],"freely":[211],"http://bio-web1.nscc-gz.cn/app/sprof-go.":[214]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":42},{"year":2024,"cited_by_count":41},{"year":2023,"cited_by_count":10}],"updated_date":"2026-05-26T13:28:51.108037","created_date":"2025-10-10T00:00:00"}
