{"id":"https://openalex.org/W4412876944","doi":"https://doi.org/10.1145/3711896.3737138","title":"Structure-Enhanced Protein Instruction Tuning: Towards General-Purpose Protein Understanding with LLMs","display_name":"Structure-Enhanced Protein Instruction Tuning: Towards General-Purpose Protein Understanding with LLMs","publication_year":2025,"publication_date":"2025-08-03","ids":{"openalex":"https://openalex.org/W4412876944","doi":"https://doi.org/10.1145/3711896.3737138"},"language":"en","primary_location":{"id":"doi:10.1145/3711896.3737138","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711896.3737138","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737138","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737138","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103326211","display_name":"Wei Wu","orcid":"https://orcid.org/0009-0009-1590-601X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wei Wu","raw_affiliation_strings":["School of Artificial Intelligence and Data Science, University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0009-0009-1590-601X","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Data Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100406893","display_name":"Chao Wang","orcid":"https://orcid.org/0000-0001-7717-447X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Wang","raw_affiliation_strings":["School of Artificial Intelligence and Data Science, University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0001-7717-447X","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Data Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056428506","display_name":"Liyi Chen","orcid":"https://orcid.org/0000-0003-2166-4386"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liyi Chen","raw_affiliation_strings":["School of Artificial Intelligence and Data Science, University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0003-2166-4386","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Data Science, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114452818","display_name":"Mingze Yin","orcid":"https://orcid.org/0009-0009-6595-9849"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingze Yin","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0009-6595-9849","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I168879160"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013049479","display_name":"Yiheng Zhu","orcid":"https://orcid.org/0000-0001-8020-9979"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiheng Zhu","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-8020-9979","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I168879160"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101956475","display_name":"Kun Fu","orcid":"https://orcid.org/0000-0002-2305-1017"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Fu","raw_affiliation_strings":["Alibaba Cloud Computing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-2305-1017","affiliations":[{"raw_affiliation_string":"Alibaba Cloud Computing, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010419481","display_name":"Jieping Ye","orcid":"https://orcid.org/0000-0001-8662-5818"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jieping Ye","raw_affiliation_strings":["Alibaba Cloud Computing, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-8662-5818","affiliations":[{"raw_affiliation_string":"Alibaba Cloud Computing, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101862104","display_name":"Hui Xiong","orcid":"https://orcid.org/0000-0001-6016-6465"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Hui Xiong","raw_affiliation_strings":["Thrust of Artificial Intelligence, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China and Department of Computer Science and Engineering, The Hong Kong University of Science and Technology, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0000-0001-6016-6465","affiliations":[{"raw_affiliation_string":"Thrust of Artificial Intelligence, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China and Department of Computer Science and Engineering, The Hong Kong University of Science and Technology, Hong Kong SAR, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101085493","display_name":"Zheng Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng Wang","raw_affiliation_strings":["Alibaba Cloud Computing, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0008-4271-6206","affiliations":[{"raw_affiliation_string":"Alibaba Cloud Computing, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5103326211"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.7114,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.72542172,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3216","last_page":"3227"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4856487512588501}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4856487512588501}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3711896.3737138","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711896.3737138","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737138","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3711896.3737138","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711896.3737138","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737138","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1782397203","display_name":null,"funder_award_id":"92370204","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6130982387","display_name":null,"funder_award_id":"2023B1515120057","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412876944.pdf","grobid_xml":"https://content.openalex.org/works/W4412876944.grobid-xml"},"referenced_works_count":52,"referenced_works":["https://openalex.org/W93532817","https://openalex.org/W2029661271","https://openalex.org/W2055522016","https://openalex.org/W2061833373","https://openalex.org/W2064675550","https://openalex.org/W2100506586","https://openalex.org/W2102461176","https://openalex.org/W2170747616","https://openalex.org/W2591359289","https://openalex.org/W2600463316","https://openalex.org/W2787473680","https://openalex.org/W2896457183","https://openalex.org/W2907492528","https://openalex.org/W2911109671","https://openalex.org/W2944851425","https://openalex.org/W2949342052","https://openalex.org/W2964110616","https://openalex.org/W2980789587","https://openalex.org/W2995514860","https://openalex.org/W2999481648","https://openalex.org/W3046375318","https://openalex.org/W3135367836","https://openalex.org/W3146944767","https://openalex.org/W3166396011","https://openalex.org/W3177500196","https://openalex.org/W3177828909","https://openalex.org/W3211795435","https://openalex.org/W4205773061","https://openalex.org/W4223581484","https://openalex.org/W4318751307","https://openalex.org/W4323304388","https://openalex.org/W4323572061","https://openalex.org/W4327550249","https://openalex.org/W4378363567","https://openalex.org/W4378373365","https://openalex.org/W4381713982","https://openalex.org/W4387432256","https://openalex.org/W4393159659","https://openalex.org/W4400231442","https://openalex.org/W4401863429","https://openalex.org/W4402671941","https://openalex.org/W4409670839","https://openalex.org/W6600007113","https://openalex.org/W6600339963","https://openalex.org/W6600540558","https://openalex.org/W6600553734","https://openalex.org/W6600669965","https://openalex.org/W6601365666","https://openalex.org/W6814250579","https://openalex.org/W6820618299","https://openalex.org/W6850503672","https://openalex.org/W6851092083"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Proteins,":[0],"as":[1],"essential":[2],"biomolecules,":[3],"play":[4],"a":[5,41,64,84,115,140,152],"central":[6],"role":[7],"in":[8,26],"biological":[9,27],"processes,":[10],"including":[11],"metabolic":[12],"reactions":[13],"and":[14,22,58,95,130,162,177,190,203,218],"DNA":[15],"replication.":[16],"Accurate":[17],"prediction":[18,56],"of":[19,31,143,154,169,211],"their":[20,92],"properties":[21,161],"functions":[23],"is":[24,51],"crucial":[25],"applications.":[28],"Recent":[29],"development":[30],"protein":[32,61,108,180,194,223],"language":[33,103],"models":[34,104],"(pLMs)":[35],"with":[36,101,165,222],"supervised":[37],"fine":[38],"tuning":[39,118],"provides":[40],"promising":[42],"solution":[43],"to":[44,77,90,106,138,157,183,188],"this":[45,67,79,111,148],"problem.":[46],"However,":[47],"the":[48,124,166,175,192,208],"fine-tuned":[49],"model":[50],"tailored":[52],"for":[53],"particular":[54],"downstream":[55],"task,":[57],"achieving":[59],"general-purpose":[60,193],"understanding":[62,142,149,195],"remains":[63],"challenge.":[65],"In":[66,110],"paper,":[68],"we":[69,113,121,146,173],"introduce":[70],"Structure-Enhanced":[71],"Protein":[72],"Instruction":[73],"Tuning":[74],"(SEPIT)":[75],"framework":[76],"bridge":[78],"gap.":[80],"Our":[81],"approach":[82],"incorporates":[83],"novel":[85,116],"structure-aware":[86],"module":[87],"into":[88],"pLMs":[89,100,126],"enrich":[91],"structural":[93],"knowledge,":[94],"subsequently":[96],"integrates":[97],"these":[98],"enhanced":[99,125],"large":[102],"(LLMs)":[105],"advance":[107],"understanding.":[109],"framework,":[112],"propose":[114],"instruction":[117,181],"pipeline.":[119],"First,":[120],"warm":[122],"up":[123],"using":[127],"contrastive":[128],"learning":[129],"structure":[131],"denoising.":[132],"Then,":[133],"caption-based":[134],"instructions":[135],"are":[136],"used":[137],"establish":[139],"basic":[141],"proteins.":[144],"Finally,":[145],"refine":[147],"by":[150],"employing":[151],"mixture":[153],"experts":[155],"(MoEs)":[156],"capture":[158],"more":[159],"complex":[160],"functional":[163],"information":[164],"same":[167],"number":[168],"activated":[170],"parameters.":[171],"Moreover,":[172],"construct":[174],"largest":[176],"most":[178],"comprehensive":[179],"dataset":[182],"date,":[184],"which":[185],"allows":[186],"us":[187],"train":[189],"evaluate":[191],"model.":[196],"Extensive":[197],"experiments":[198],"on":[199],"both":[200,214],"open-ended":[201],"generation":[202],"closed-set":[204],"answer":[205],"tasks":[206],"demonstrate":[207],"superior":[209],"performance":[210],"SEPIT":[212],"over":[213],"closed-source":[215],"general":[216],"LLMs":[217,220],"open-source":[219],"trained":[221],"knowledge.":[224]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
