{"id":"https://openalex.org/W4415279744","doi":"https://doi.org/10.1109/tcbbio.2025.3621138","title":"Nearest Neighbor CCP-Based Molecular Sequence Analysis","display_name":"Nearest Neighbor CCP-Based Molecular Sequence Analysis","publication_year":2025,"publication_date":"2025-10-15","ids":{"openalex":"https://openalex.org/W4415279744","doi":"https://doi.org/10.1109/tcbbio.2025.3621138","pmid":"https://pubmed.ncbi.nlm.nih.gov/41091606"},"language":"en","primary_location":{"id":"doi:10.1109/tcbbio.2025.3621138","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcbbio.2025.3621138","pdf_url":null,"source":{"id":"https://openalex.org/S5407042751","display_name":"IEEE Transactions on Computational Biology and Bioinformatics","issn_l":"2998-4165","issn":["2998-4165"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computational Biology and Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064858842","display_name":"Sarwan Ali","orcid":"https://orcid.org/0000-0001-8121-2168"},"institutions":[{"id":"https://openalex.org/I2799503643","display_name":"Columbia University Irving Medical Center","ror":"https://ror.org/01esghr10","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2799503643"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sarwan Ali","raw_affiliation_strings":["Irving Medical Center, Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Irving Medical Center, Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I2799503643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017366862","display_name":"Prakash Chourasia","orcid":"https://orcid.org/0000-0002-1443-2192"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Prakash Chourasia","raw_affiliation_strings":["Georgia State University, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia State University, Atlanta, GA, USA","institution_ids":["https://openalex.org/I181565077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085735278","display_name":"Binod Prasad Koirala","orcid":"https://orcid.org/0000-0002-8492-220X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bipin Koirala","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026228482","display_name":"Murray Patterson","orcid":"https://orcid.org/0000-0002-4329-0234"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Murray Patterson","raw_affiliation_strings":["Georgia State University, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia State University, Atlanta, GA, USA","institution_ids":["https://openalex.org/I181565077"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5064858842"],"corresponding_institution_ids":["https://openalex.org/I2799503643"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.32283018,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"23","issue":"1","first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10908","display_name":"Analytical Chemistry and Chromatography","score":0.9750000238418579,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9690999984741211,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7078999876976013},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6362000107765198},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6223999857902527},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.5931000113487244},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.5619999766349792},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5386999845504761},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.42559999227523804},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.423799991607666}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7078999876976013},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6362000107765198},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6223999857902527},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.5931000113487244},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.5619999766349792},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5426999926567078},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5386999845504761},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47870001196861267},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.42559999227523804},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.423799991607666},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.40049999952316284},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3921000063419342},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3898000121116638},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.3840999901294708},{"id":"https://openalex.org/C27438332","wikidata":"https://www.wikidata.org/wiki/Q2873","display_name":"Principal component analysis","level":2,"score":0.36410000920295715},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34860000014305115},{"id":"https://openalex.org/C45484198","wikidata":"https://www.wikidata.org/wiki/Q827246","display_name":"Sequence alignment","level":4,"score":0.34150001406669617},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3366999924182892},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.3239000141620636},{"id":"https://openalex.org/C10010492","wikidata":"https://www.wikidata.org/wiki/Q3142557","display_name":"Protein sequencing","level":4,"score":0.32089999318122864},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3188999891281128},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C61053724","wikidata":"https://www.wikidata.org/wiki/Q1154615","display_name":"Sequence analysis","level":3,"score":0.31060001254081726},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2551000118255615}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D020539","descriptor_name":"Sequence Analysis, Protein","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D020539","descriptor_name":"Sequence Analysis, Protein","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1109/tcbbio.2025.3621138","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcbbio.2025.3621138","pdf_url":null,"source":{"id":"https://openalex.org/S5407042751","display_name":"IEEE Transactions on Computational Biology and Bioinformatics","issn_l":"2998-4165","issn":["2998-4165"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computational Biology and Bioinformatics","raw_type":"journal-article"},{"id":"pmid:41091606","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41091606","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on computational biology and bioinformatics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W85047175","https://openalex.org/W2021210077","https://openalex.org/W2045204781","https://openalex.org/W2054655136","https://openalex.org/W2079361215","https://openalex.org/W2097463865","https://openalex.org/W2109363337","https://openalex.org/W2121122425","https://openalex.org/W2121784059","https://openalex.org/W2131987814","https://openalex.org/W2134312057","https://openalex.org/W2140095548","https://openalex.org/W2770645414","https://openalex.org/W2951506174","https://openalex.org/W2951527381","https://openalex.org/W2963585318","https://openalex.org/W2995514860","https://openalex.org/W3005127469","https://openalex.org/W3005235420","https://openalex.org/W3087224093","https://openalex.org/W3176294083","https://openalex.org/W3198971816","https://openalex.org/W4205773061","https://openalex.org/W4220693339","https://openalex.org/W4295759516","https://openalex.org/W4387425124","https://openalex.org/W4402327530","https://openalex.org/W4405367486","https://openalex.org/W4407693558"],"related_works":[],"abstract_inverted_index":{"Molecular":[0],"sequence":[1,84,115,173],"analysis":[2],"is":[3,93],"crucial":[4],"for":[5,70,83,89,111],"understanding":[6],"several":[7],"biological":[8,71],"processes,":[9],"including":[10],"protein-protein":[11],"interactions,":[12],"functional":[13],"annotation,":[14],"and":[15,23,40,50,60,107,122,160,177,197],"disease":[16],"classification.":[17],"The":[18,74],"large":[19],"number":[20],"of":[21,28,47,129,152,184],"sequences":[22,92,121],"the":[24,45,157,162,182],"inherently":[25],"complicated":[26],"nature":[27],"protein":[29],"structures":[30],"make":[31],"it":[32,145],"challenging":[33],"to":[34,134,149,180],"analyze":[35],"such":[36],"data.":[37,73,116],"Finding":[38],"patterns":[39],"enhancing":[41],"subsequent":[42],"research":[43],"requires":[44],"use":[46,128],"dimensionality":[48],"reduction":[49],"feature":[51],"selection":[52],"approaches.":[53],"Recently,":[54],"a":[55,103,150,165,171],"method":[56,69],"called":[57],"Correlated":[58,105],"Clustering":[59,106],"Projection":[61,108],"(CCP)":[62],"has":[63],"been":[64],"proposed":[65,186],"as":[66],"an":[67],"effective":[68],"sequencing":[72],"CCP":[75,126,137,176,200],"technique":[76,110],"remains":[77],"computationally":[78],"expensive,":[79],"despite":[80],"its":[81,87],"effectiveness":[82],"visualization.":[85],"Furthermore,":[86],"utility":[88],"classifying":[90],"molecular":[91,114,120,172],"still":[94],"uncertain.":[95],"To":[96,117],"solve":[97],"these":[98],"two":[99],"problems,":[100],"we":[101],"present":[102],"Nearest-Neighbor":[104],"(CCP-NN)-based":[109],"efficiently":[112],"preprocessing":[113],"group":[118],"related":[119],"produce":[123],"representative":[124],"supersequences,":[125],"makes":[127],"sequence-to-sequence":[130],"correlations.":[131],"As":[132],"opposed":[133],"conventional":[135],"methods,":[136],"does":[138],"not":[139],"rely":[140],"on":[141],"matrix":[142],"diagonalization,":[143],"therefore,":[144],"can":[146],"be":[147],"applied":[148],"range":[151],"machine-learning":[153],"problems.":[154],"We":[155,169],"estimate":[156],"density":[158],"map":[159],"compute":[161],"correlation":[163],"using":[164,175],"nearest-neighbor":[166],"search":[167],"technique.":[168],"perform":[170],"classification":[174,195],"CCP-NN":[178,192],"representations":[179],"assess":[181],"efficacy":[183],"our":[185],"approach.":[187],"Our":[188],"findings":[189],"show":[190],"that":[191],"considerably":[193],"improves":[194],"accuracy":[196],"significantly":[198],"outperforms":[199],"in":[201],"computational":[202],"runtime.":[203]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-17T00:00:00"}
