{"id":"https://openalex.org/W4410144829","doi":"https://doi.org/10.1007/s13721-026-00732-4","title":"Unraveling Protein Secrets: Machine Learning Unveils Novel Biologically Significant Associations Among Amino Acids","display_name":"Unraveling Protein Secrets: Machine Learning Unveils Novel Biologically Significant Associations Among Amino Acids","publication_year":2025,"publication_date":"2025-05-06","ids":{"openalex":"https://openalex.org/W4410144829","doi":"https://doi.org/10.1007/s13721-026-00732-4"},"language":"en","primary_location":{"id":"doi:10.1007/s13721-026-00732-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s13721-026-00732-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s13721-026-00732-4.pdf","source":{"id":"https://openalex.org/S4210178532","display_name":"Network Modeling Analysis in Health Informatics and Bioinformatics","issn_l":"2192-6662","issn":["2192-6662","2192-6670"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310321666","host_organization_name":"Springer Vienna","host_organization_lineage":["https://openalex.org/P4310321666","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Vienna","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Network Modeling Analysis in Health Informatics and Bioinformatics","raw_type":"journal-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s13721-026-00732-4.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091216658","display_name":"Samuel Kakraba","orcid":"https://orcid.org/0000-0002-6362-5126"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Samuel Kakraba","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0002-6362-5126","affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016430575","display_name":"Aayire C. Yadem","orcid":"https://orcid.org/0000-0002-4923-7792"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aayire Clement Yadem","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5026149377","display_name":"K. Thomas Abraham","orcid":"https://orcid.org/0009-0002-9269-2536"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuukua Egyinba Abraham","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5091216658"],"corresponding_institution_ids":[],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":3.1641,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.91435245,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"15","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13937","display_name":"Genetics, Bioinformatics, and Biomedical Research","score":0.9786999821662903,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T13937","display_name":"Genetics, Bioinformatics, and Biomedical Research","score":0.9786999821662903,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9593999981880188,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/amino-acid","display_name":"Amino acid","score":0.5713744759559631},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.36674344539642334},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.35850241780281067},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.35463452339172363},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.34892022609710693},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.3148414194583893},{"id":"https://openalex.org/keywords/biochemistry","display_name":"Biochemistry","score":0.2387239933013916}],"concepts":[{"id":"https://openalex.org/C515207424","wikidata":"https://www.wikidata.org/wiki/Q8066","display_name":"Amino acid","level":2,"score":0.5713744759559631},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.36674344539642334},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.35850241780281067},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.35463452339172363},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.34892022609710693},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.3148414194583893},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.2387239933013916}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s13721-026-00732-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s13721-026-00732-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s13721-026-00732-4.pdf","source":{"id":"https://openalex.org/S4210178532","display_name":"Network Modeling Analysis in Health Informatics and Bioinformatics","issn_l":"2192-6662","issn":["2192-6662","2192-6670"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310321666","host_organization_name":"Springer Vienna","host_organization_lineage":["https://openalex.org/P4310321666","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Vienna","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Network Modeling Analysis in Health Informatics and Bioinformatics","raw_type":"journal-article"},{"id":"doi:10.20944/preprints202505.0139.v1","is_oa":true,"landing_page_url":"https://doi.org/10.20944/preprints202505.0139.v1","pdf_url":"https://www.preprints.org/frontend/manuscript/d2a8e7f9e4bfc7425ed6563e643455b7/download_pub","source":{"id":"https://openalex.org/S6309402219","display_name":"Preprints.org","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"posted-content"}],"best_oa_location":{"id":"doi:10.1007/s13721-026-00732-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s13721-026-00732-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s13721-026-00732-4.pdf","source":{"id":"https://openalex.org/S4210178532","display_name":"Network Modeling Analysis in Health Informatics and Bioinformatics","issn_l":"2192-6662","issn":["2192-6662","2192-6670"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310321666","host_organization_name":"Springer Vienna","host_organization_lineage":["https://openalex.org/P4310321666","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Vienna","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Network Modeling Analysis in Health Informatics and Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320310142","display_name":"Tulane University","ror":"https://ror.org/04vmvtb21"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4410144829.pdf","grobid_xml":"https://content.openalex.org/works/W4410144829.grobid-xml"},"referenced_works_count":52,"referenced_works":["https://openalex.org/W1493454437","https://openalex.org/W1548779692","https://openalex.org/W1975304761","https://openalex.org/W1980704774","https://openalex.org/W1987971958","https://openalex.org/W2002611021","https://openalex.org/W2013225614","https://openalex.org/W2021975315","https://openalex.org/W2037263319","https://openalex.org/W2042655455","https://openalex.org/W2049631297","https://openalex.org/W2053138484","https://openalex.org/W2071949631","https://openalex.org/W2080796053","https://openalex.org/W2092142860","https://openalex.org/W2101375662","https://openalex.org/W2147209844","https://openalex.org/W2154042308","https://openalex.org/W2411934008","https://openalex.org/W2497383353","https://openalex.org/W2577956156","https://openalex.org/W2619144974","https://openalex.org/W2785884569","https://openalex.org/W2786126313","https://openalex.org/W2963237893","https://openalex.org/W2966766006","https://openalex.org/W2973263015","https://openalex.org/W2976578932","https://openalex.org/W2979935139","https://openalex.org/W2996511031","https://openalex.org/W3003257820","https://openalex.org/W3035965352","https://openalex.org/W3047776327","https://openalex.org/W3181135909","https://openalex.org/W3204859513","https://openalex.org/W4200436320","https://openalex.org/W4210282038","https://openalex.org/W4210282678","https://openalex.org/W4213345021","https://openalex.org/W4249920046","https://openalex.org/W4377115574","https://openalex.org/W4385407692","https://openalex.org/W4387843635","https://openalex.org/W4390660001","https://openalex.org/W4393111592","https://openalex.org/W4394576200","https://openalex.org/W4400953933","https://openalex.org/W4405991965","https://openalex.org/W4406434658","https://openalex.org/W4416257154","https://openalex.org/W4417022838","https://openalex.org/W4417035486"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W2948807893","https://openalex.org/W2899084033","https://openalex.org/W2778153218","https://openalex.org/W2748952813","https://openalex.org/W1531601525","https://openalex.org/W4391375266","https://openalex.org/W2078814861","https://openalex.org/W2527526854","https://openalex.org/W1976181487"],"abstract_inverted_index":{"Accurate":[0],"classification":[1],"of":[2,163],"amino":[3,177],"acids":[4],"is":[5],"fundamental":[6],"to":[7,42],"protein":[8,192],"engineering":[9],"and":[10,64,75,78,93,100,106,118,139,188],"structural":[11,144],"biology.":[12],"Traditional":[13],"schemes":[14],"often":[15,131],"rely":[16],"on":[17],"single-dimensional":[18],"properties,":[19],"potentially":[20],"overlooking":[21],"complex":[22],"structure\u2013function":[23],"relationships.":[24],"In":[25],"this":[26],"study,":[27],"we":[28,110],"present":[29],"an":[30],"automated,":[31],"AI-driven":[32],"hierarchical":[33],"clustering":[34],"pipeline":[35],"utilizing":[36],"22":[37],"novel":[38],"graph-theoretic":[39,169],"molecular":[40],"descriptors":[41,170],"uncover":[43],"high-dimensional":[44],"biochemical":[45],"associations.":[46],"Using":[47],"average":[48],"linkage":[49],"with":[50,146,157],"Manhattan":[51],"distance,":[52],"our":[53],"approach":[54],"achieved":[55],"a":[56,101,112,125,172,181],"high":[57],"cophenetic":[58],"correlation":[59],"(0.847),":[60],"significantly":[61],"outperforming":[62],"K-means":[63],"DBSCAN":[65],"in":[66,128],"both":[67],"cluster":[68],"quality":[69],"(Silhouette":[70],"score:":[71],"0.573":[72],"vs.":[73],"0.548":[74],"0.412,":[76],"respectively)":[77],"biological":[79],"interpretability.":[80],"The":[81],"analysis":[82,153],"revealed":[83],"two":[84],"dominant":[85],"clusters:":[86],"one":[87],"comprising":[88],"aromatic":[89,119],"(Trp,":[90],"Phe,":[91],"Tyr)":[92],"positively":[94],"charged":[95],"residues":[96,120],"(Arg,":[97],"His,":[98],"Lys),":[99],"second":[102],"encompassing":[103],"aliphatic,":[104],"polar,":[105],"acidic":[107],"residues.":[108],"Notably,":[109],"identified":[111],"robust,":[113],"high-stability":[114],"association":[115],"between":[116],"Arginine":[117],"(consensus":[121],">":[122],"0.85),":[123],"suggesting":[124],"functional":[126],"basis":[127],"cation\u2013\u03c0":[129],"interactions":[130],"missed":[132],"by":[133],"classical":[134],"hydrophobicity":[135],"scales.":[136],"Conversely,":[137],"Glycine":[138],"Proline":[140],"emerged":[141],"as":[142,171],"distinct":[143],"outliers":[145],"low":[147],"co-clustering":[148],"probabilities":[149],"(<":[150],"0.3).":[151],"Sensitivity":[152],"demonstrated":[154],"remarkable":[155],"robustness,":[156],"core":[158],"clusters":[159],"persisting":[160],"across":[161],"98.6%":[162],"descriptor":[164],"subsets.":[165],"These":[166],"findings":[167],"validate":[168],"powerful":[173],"tool":[174],"for":[175,184],"refining":[176],"acid":[178],"alphabets,":[179],"offering":[180],"robust":[182],"framework":[183],"predicting":[185],"mutation":[186],"effects":[187],"guiding":[189],"de":[190],"novo":[191],"design.":[193]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
