{"id":"https://openalex.org/W4417062420","doi":"https://doi.org/10.1145/3768322.3769033","title":"Systematic evaluation of 566 sequence-based features for predicting protein stability changes induced by mutations using machine learning","display_name":"Systematic evaluation of 566 sequence-based features for predicting protein stability changes induced by mutations using machine learning","publication_year":2025,"publication_date":"2025-10-11","ids":{"openalex":"https://openalex.org/W4417062420","doi":"https://doi.org/10.1145/3768322.3769033"},"language":null,"primary_location":{"id":"doi:10.1145/3768322.3769033","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3768322.3769033","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3768322.3769033","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3768322.3769033","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101299199","display_name":"Qiaobin Yao","orcid":"https://orcid.org/0009-0009-3832-1524"},"institutions":[{"id":"https://openalex.org/I137853757","display_name":"Howard University","ror":"https://ror.org/05gt1vc06","country_code":"US","type":"education","lineage":["https://openalex.org/I137853757"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Qiaobin Yao","raw_affiliation_strings":["Biology, Howard University, Washington, DC, USA"],"raw_orcid":"https://orcid.org/0009-0009-3832-1524","affiliations":[{"raw_affiliation_string":"Biology, Howard University, Washington, DC, USA","institution_ids":["https://openalex.org/I137853757"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Junyan Li","orcid":"https://orcid.org/0000-0002-1392-5796"},"institutions":[{"id":"https://openalex.org/I137853757","display_name":"Howard University","ror":"https://ror.org/05gt1vc06","country_code":"US","type":"education","lineage":["https://openalex.org/I137853757"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junyan Li","raw_affiliation_strings":["Biology, Howard University, Washington, DC, USA"],"raw_orcid":"https://orcid.org/0000-0002-1392-5796","affiliations":[{"raw_affiliation_string":"Biology, Howard University, Washington, DC, USA","institution_ids":["https://openalex.org/I137853757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101516631","display_name":"Dongxiao Liu","orcid":"https://orcid.org/0000-0001-9656-8976"},"institutions":[{"id":"https://openalex.org/I137853757","display_name":"Howard University","ror":"https://ror.org/05gt1vc06","country_code":"US","type":"education","lineage":["https://openalex.org/I137853757"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dongxiao Liu","raw_affiliation_strings":["Biology, Howard University, Washington, DC, USA"],"raw_orcid":"https://orcid.org/0000-0001-9656-8976","affiliations":[{"raw_affiliation_string":"Biology, Howard University, Washington, DC, USA","institution_ids":["https://openalex.org/I137853757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120691444","display_name":"Krish Wahi","orcid":"https://orcid.org/0009-0003-6486-1387"},"institutions":[{"id":"https://openalex.org/I135191193","display_name":"University of Indianapolis","ror":"https://ror.org/052133d12","country_code":"US","type":"education","lineage":["https://openalex.org/I135191193"]},{"id":"https://openalex.org/I55769427","display_name":"Indiana University \u2013 Purdue University Indianapolis","ror":"https://ror.org/05gxnyn08","country_code":"US","type":"education","lineage":["https://openalex.org/I55769427","https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Krish Wahi","raw_affiliation_strings":["School of Science, Indiana University Indianapolis, Indianapolis, IN, USA"],"raw_orcid":"https://orcid.org/0009-0003-6486-1387","affiliations":[{"raw_affiliation_string":"School of Science, Indiana University Indianapolis, Indianapolis, IN, USA","institution_ids":["https://openalex.org/I55769427","https://openalex.org/I135191193"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081951768","display_name":"Shaolei Teng","orcid":"https://orcid.org/0000-0001-8326-9889"},"institutions":[{"id":"https://openalex.org/I137853757","display_name":"Howard University","ror":"https://ror.org/05gt1vc06","country_code":"US","type":"education","lineage":["https://openalex.org/I137853757"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shaolei Teng","raw_affiliation_strings":["Biology, Howard University, Washington, DC, USA"],"raw_orcid":"https://orcid.org/0000-0001-8326-9889","affiliations":[{"raw_affiliation_string":"Biology, Howard University, Washington, DC, USA","institution_ids":["https://openalex.org/I137853757"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101299199"],"corresponding_institution_ids":["https://openalex.org/I137853757"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33108939,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.8108000159263611,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.8108000159263611,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.07029999792575836,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.029600000008940697,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.9398000240325928},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.6690000295639038},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.5630000233650208},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5526999831199646},{"id":"https://openalex.org/keywords/complement","display_name":"Complement (music)","score":0.5019999742507935},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4196999967098236},{"id":"https://openalex.org/keywords/predictive-power","display_name":"Predictive power","score":0.40119999647140503},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.40070000290870667},{"id":"https://openalex.org/keywords/sliding-window-protocol","display_name":"Sliding window protocol","score":0.39259999990463257}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.9398000240325928},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.8104000091552734},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7437000274658203},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.6690000295639038},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.5630000233650208},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5526999831199646},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5485000014305115},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.5019999742507935},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4196999967098236},{"id":"https://openalex.org/C2778136018","wikidata":"https://www.wikidata.org/wiki/Q10350689","display_name":"Predictive power","level":2,"score":0.40119999647140503},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.40070000290870667},{"id":"https://openalex.org/C102392041","wikidata":"https://www.wikidata.org/wiki/Q592860","display_name":"Sliding window protocol","level":3,"score":0.39259999990463257},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.3668999969959259},{"id":"https://openalex.org/C2988375501","wikidata":"https://www.wikidata.org/wiki/Q847556","display_name":"Protein stability","level":2,"score":0.34929999709129333},{"id":"https://openalex.org/C11804247","wikidata":"https://www.wikidata.org/wiki/Q896177","display_name":"Protein\u2013protein interaction","level":2,"score":0.31349998712539673},{"id":"https://openalex.org/C58471807","wikidata":"https://www.wikidata.org/wiki/Q327120","display_name":"Receiver operating characteristic","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27959999442100525},{"id":"https://openalex.org/C3020225094","wikidata":"https://www.wikidata.org/wiki/Q80091","display_name":"Area under curve","level":3,"score":0.2791000008583069},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C501734568","wikidata":"https://www.wikidata.org/wiki/Q42918","display_name":"Mutation","level":3,"score":0.27250000834465027},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3768322.3769033","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3768322.3769033","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3768322.3769033","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3768322.3769033","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3768322.3769033","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3768322.3769033","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4114661379","display_name":null,"funder_award_id":"W911NF-20-2-0277","funder_id":"https://openalex.org/F4320338295","funder_display_name":"Army Research Laboratory"},{"id":"https://openalex.org/G4646999951","display_name":null,"funder_award_id":"2U54MD007597","funder_id":"https://openalex.org/F4320337534","funder_display_name":"National Institute on Minority Health and Health Disparities"},{"id":"https://openalex.org/G4708023126","display_name":null,"funder_award_id":"2U54MD007597","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G5259331294","display_name":null,"funder_award_id":"W911NF","funder_id":"https://openalex.org/F4320338295","funder_display_name":"Army Research Laboratory"},{"id":"https://openalex.org/G699435557","display_name":null,"funder_award_id":"2000296","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8971252212","display_name":"Targeted Infusion Project: Advancing Computational Biology Training for Undergraduate Students at Howard University","funder_award_id":"2406155","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337534","display_name":"National Institute on Minority Health and Health Disparities","ror":"https://ror.org/0493hgw16"},{"id":"https://openalex.org/F4320338295","display_name":"Army Research Laboratory","ror":"https://ror.org/011hc8f90"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4417062420.pdf"},"referenced_works_count":16,"referenced_works":["https://openalex.org/W2003810144","https://openalex.org/W2017980560","https://openalex.org/W2120949690","https://openalex.org/W2411557417","https://openalex.org/W2889930091","https://openalex.org/W4310069568","https://openalex.org/W4364381578","https://openalex.org/W4379600237","https://openalex.org/W4387899477","https://openalex.org/W4389608298","https://openalex.org/W4389819097","https://openalex.org/W4402826776","https://openalex.org/W4403541500","https://openalex.org/W4406167408","https://openalex.org/W4412734078","https://openalex.org/W4413376900"],"related_works":[],"abstract_inverted_index":{"Accurately":[0],"predicting":[1],"protein":[2,18,30],"stability":[3,75,151],"changes":[4],"(\u0394\u0394G)":[5],"upon":[6],"amino":[7],"acid":[8],"substitutions":[9],"is":[10],"essential":[11],"for":[12,69,189],"understanding":[13],"disease":[14],"mechanisms":[15],"and":[16,43,45,91,107,124,150],"guiding":[17],"engineering.":[19],"While":[20],"recent":[21],"deep":[22,183],"learning":[23,184],"frameworks":[24],"can":[25,177],"extract":[26],"representations":[27],"directly":[28],"from":[29,58,86],"sequences":[31],"or":[32],"structures,":[33],"it":[34],"remains":[35],"unclear":[36],"which":[37],"sequence-derived":[38],"features":[39,118,138,152],"are":[40],"most":[41,129],"predictive":[42,194],"interpretable,":[44],"how":[46],"they":[47],"may":[48],"complement":[49],"large":[50],"embeddings.":[51],"Here,":[52],"we":[53,82],"systematically":[54],"evaluated":[55],"566":[56],"descriptors":[57],"the":[59,73,113,128,134,187],"AAIndex":[60],"database":[61],"to":[62,88,133,141,148],"identify":[63],"compact,":[64],"biochemically":[65],"meaningful":[66],"feature":[67,168],"subsets":[68],"\u0394\u0394G":[70,199],"prediction.":[71,200],"Using":[72],"side-chain":[74],"contribution":[76],"(S3)":[77],"dataset":[78],"as":[79,127],"a":[80,94,144,165],"benchmark,":[81],"tested":[83],"sliding":[84],"windows":[85],"5":[87],"21":[89],"residues":[90],"found":[92],"that":[93,170,192],"13-residue":[95],"window":[96],"consistently":[97],"optimized":[98],"performance":[99,158],"across":[100],"models.":[101],"Random":[102],"Forest":[103],"(ROC-AUC":[104,109],"=":[105,110,155],"0.787)":[106],"XGBoost":[108],"0.776)":[111],"achieved":[112,153],"best":[114],"baseline":[115],"results.":[116],"Grouping":[117],"into":[119],"categories":[120],"revealed":[121],"hydrophobicity,":[122],"physicochemical,":[123],"stability-related":[125],"indices":[126],"informative.":[130],"Restricting":[131],"models":[132],"top":[135],"20\u201330":[136],"ranked":[137],"improved":[139],"AUC":[140,154],"0.820,":[142],"while":[143],"minimal":[145],"subset":[146],"limited":[147],"hydrophobicity":[149],"0.813,":[156],"maintaining":[157],"with":[159,182,196],"enhanced":[160],"interpretability.":[161],"These":[162],"findings":[163],"establish":[164],"concise,":[166],"robust":[167],"panel":[169],"not":[171],"only":[172],"provides":[173],"mechanistic":[174],"insights":[175],"but":[176],"also":[178],"be":[179],"seamlessly":[180],"integrated":[181],"embeddings,":[185],"paving":[186],"way":[188],"hybrid":[190],"approaches":[191],"combine":[193],"power":[195],"interpretability":[197],"in":[198]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-12-05T00:00:00"}
