{"id":"https://openalex.org/W4399369918","doi":"https://doi.org/10.1021/acs.jcim.4c00625","title":"Identification of Family-Specific Features in Cas9 and Cas12 Proteins: A Machine Learning Approach Using Complete Protein Feature Spectrum","display_name":"Identification of Family-Specific Features in Cas9 and Cas12 Proteins: A Machine Learning Approach Using Complete Protein Feature Spectrum","publication_year":2024,"publication_date":"2024-06-05","ids":{"openalex":"https://openalex.org/W4399369918","doi":"https://doi.org/10.1021/acs.jcim.4c00625","pmid":"https://pubmed.ncbi.nlm.nih.gov/38838358"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.4c00625","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.4c00625","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020669947","display_name":"Sita Sirisha Madugula","orcid":"https://orcid.org/0000-0001-9944-117X"},"institutions":[{"id":"https://openalex.org/I123534392","display_name":"University of North Texas","ror":"https://ror.org/00v97ad02","country_code":"US","type":"education","lineage":["https://openalex.org/I123534392"]},{"id":"https://openalex.org/I165139151","display_name":"University of North Texas Health Science Center","ror":"https://ror.org/05msxaq47","country_code":"US","type":"education","lineage":["https://openalex.org/I165139151"]},{"id":"https://openalex.org/I2802090120","display_name":"University of North Texas System","ror":"https://ror.org/03qbxj466","country_code":"US","type":"education","lineage":["https://openalex.org/I2802090120"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sita Sirisha Madugula","raw_affiliation_strings":["Department of Pharmaceutical Sciences, University of North Texas System College of Pharmacy, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States"],"raw_orcid":"https://orcid.org/0000-0001-9944-117X","affiliations":[{"raw_affiliation_string":"Department of Pharmaceutical Sciences, University of North Texas System College of Pharmacy, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States","institution_ids":["https://openalex.org/I123534392","https://openalex.org/I2802090120","https://openalex.org/I165139151"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093773269","display_name":"Pranav Pujar","orcid":"https://orcid.org/0009-0009-7962-3713"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pranav Pujar","raw_affiliation_strings":["Department of Industrial, Manufacturing and Systems Engineering, University of Texas at Arlington, 701 South Nedderman Drive, Arlington, Texas 76019, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Industrial, Manufacturing and Systems Engineering, University of Texas at Arlington, 701 South Nedderman Drive, Arlington, Texas 76019, United States","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093773270","display_name":"Bharani Nammi","orcid":null},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bharani Nammi","raw_affiliation_strings":["Department of Industrial, Manufacturing and Systems Engineering, University of Texas at Arlington, 701 South Nedderman Drive, Arlington, Texas 76019, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Industrial, Manufacturing and Systems Engineering, University of Texas at Arlington, 701 South Nedderman Drive, Arlington, Texas 76019, United States","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022486615","display_name":"Shouyi Wang","orcid":"https://orcid.org/0000-0001-6366-3619"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shouyi Wang","raw_affiliation_strings":["Department of Industrial, Manufacturing and Systems Engineering, University of Texas at Arlington, 701 South Nedderman Drive, Arlington, Texas 76019, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Industrial, Manufacturing and Systems Engineering, University of Texas at Arlington, 701 South Nedderman Drive, Arlington, Texas 76019, United States","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021010122","display_name":"Vindi M. Jayasinghe\u2010Arachchige","orcid":"https://orcid.org/0000-0002-5493-6328"},"institutions":[{"id":"https://openalex.org/I123534392","display_name":"University of North Texas","ror":"https://ror.org/00v97ad02","country_code":"US","type":"education","lineage":["https://openalex.org/I123534392"]},{"id":"https://openalex.org/I165139151","display_name":"University of North Texas Health Science Center","ror":"https://ror.org/05msxaq47","country_code":"US","type":"education","lineage":["https://openalex.org/I165139151"]},{"id":"https://openalex.org/I2802090120","display_name":"University of North Texas System","ror":"https://ror.org/03qbxj466","country_code":"US","type":"education","lineage":["https://openalex.org/I2802090120"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vindi M. Jayasinghe-Arachchige","raw_affiliation_strings":["Department of Pharmaceutical Sciences, University of North Texas System College of Pharmacy, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States"],"raw_orcid":"https://orcid.org/0000-0002-5493-6328","affiliations":[{"raw_affiliation_string":"Department of Pharmaceutical Sciences, University of North Texas System College of Pharmacy, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States","institution_ids":["https://openalex.org/I123534392","https://openalex.org/I2802090120","https://openalex.org/I165139151"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101227232","display_name":"Tyler Pham","orcid":null},"institutions":[{"id":"https://openalex.org/I123534392","display_name":"University of North Texas","ror":"https://ror.org/00v97ad02","country_code":"US","type":"education","lineage":["https://openalex.org/I123534392"]},{"id":"https://openalex.org/I165139151","display_name":"University of North Texas Health Science Center","ror":"https://ror.org/05msxaq47","country_code":"US","type":"education","lineage":["https://openalex.org/I165139151"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tyler Pham","raw_affiliation_strings":["School of Biomedical Sciences, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Biomedical Sciences, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States","institution_ids":["https://openalex.org/I123534392","https://openalex.org/I165139151"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093773271","display_name":"Dominic Mashburn","orcid":null},"institutions":[{"id":"https://openalex.org/I123534392","display_name":"University of North Texas","ror":"https://ror.org/00v97ad02","country_code":"US","type":"education","lineage":["https://openalex.org/I123534392"]},{"id":"https://openalex.org/I165139151","display_name":"University of North Texas Health Science Center","ror":"https://ror.org/05msxaq47","country_code":"US","type":"education","lineage":["https://openalex.org/I165139151"]},{"id":"https://openalex.org/I2802090120","display_name":"University of North Texas System","ror":"https://ror.org/03qbxj466","country_code":"US","type":"education","lineage":["https://openalex.org/I2802090120"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dominic Mashburn","raw_affiliation_strings":["Department of Pharmaceutical Sciences, University of North Texas System College of Pharmacy, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Pharmaceutical Sciences, University of North Texas System College of Pharmacy, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States","institution_ids":["https://openalex.org/I123534392","https://openalex.org/I2802090120","https://openalex.org/I165139151"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085759915","display_name":"Maria Artiles","orcid":"https://orcid.org/0000-0001-7125-5113"},"institutions":[{"id":"https://openalex.org/I123534392","display_name":"University of North Texas","ror":"https://ror.org/00v97ad02","country_code":"US","type":"education","lineage":["https://openalex.org/I123534392"]},{"id":"https://openalex.org/I165139151","display_name":"University of North Texas Health Science Center","ror":"https://ror.org/05msxaq47","country_code":"US","type":"education","lineage":["https://openalex.org/I165139151"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maria Artiles","raw_affiliation_strings":["School of Biomedical Sciences, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Biomedical Sciences, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States","institution_ids":["https://openalex.org/I123534392","https://openalex.org/I165139151"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100327035","display_name":"Jin Liu","orcid":"https://orcid.org/0000-0002-1067-4063"},"institutions":[{"id":"https://openalex.org/I123534392","display_name":"University of North Texas","ror":"https://ror.org/00v97ad02","country_code":"US","type":"education","lineage":["https://openalex.org/I123534392"]},{"id":"https://openalex.org/I165139151","display_name":"University of North Texas Health Science Center","ror":"https://ror.org/05msxaq47","country_code":"US","type":"education","lineage":["https://openalex.org/I165139151"]},{"id":"https://openalex.org/I2802090120","display_name":"University of North Texas System","ror":"https://ror.org/03qbxj466","country_code":"US","type":"education","lineage":["https://openalex.org/I2802090120"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jin Liu","raw_affiliation_strings":["Department of Pharmaceutical Sciences, University of North Texas System College of Pharmacy, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States","School of Biomedical Sciences, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States"],"raw_orcid":"https://orcid.org/0000-0002-1067-4063","affiliations":[{"raw_affiliation_string":"Department of Pharmaceutical Sciences, University of North Texas System College of Pharmacy, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States","institution_ids":["https://openalex.org/I123534392","https://openalex.org/I2802090120","https://openalex.org/I165139151"]},{"raw_affiliation_string":"School of Biomedical Sciences, University of North Texas Health Science Center, 3500 Camp Bowie Blvd, Fort Worth, Texas 76107, United States","institution_ids":["https://openalex.org/I123534392","https://openalex.org/I165139151"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100327035"],"corresponding_institution_ids":["https://openalex.org/I123534392","https://openalex.org/I165139151","https://openalex.org/I2802090120"],"apc_list":null,"apc_paid":null,"fwci":1.2842,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.79036017,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"64","issue":"12","first_page":"4897","last_page":"4911"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10878","display_name":"CRISPR and Genetic Engineering","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5689426064491272},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.5658496022224426},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.5519173741340637},{"id":"https://openalex.org/keywords/cas9","display_name":"Cas9","score":0.5121848583221436},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.4706506133079529},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4529976546764374},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3640199303627014},{"id":"https://openalex.org/keywords/crispr","display_name":"CRISPR","score":0.35240352153778076},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.334092378616333},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3204301595687866},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.2789439558982849}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5689426064491272},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.5658496022224426},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.5519173741340637},{"id":"https://openalex.org/C132455925","wikidata":"https://www.wikidata.org/wiki/Q16965677","display_name":"Cas9","level":4,"score":0.5121848583221436},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.4706506133079529},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4529976546764374},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3640199303627014},{"id":"https://openalex.org/C98108389","wikidata":"https://www.wikidata.org/wiki/Q412563","display_name":"CRISPR","level":3,"score":0.35240352153778076},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.334092378616333},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3204301595687866},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.2789439558982849}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D000076987","descriptor_name":"CRISPR-Associated Protein 9","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D064113","descriptor_name":"CRISPR-Cas Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D064113","descriptor_name":"CRISPR-Cas Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D064113","descriptor_name":"CRISPR-Cas Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D064113","descriptor_name":"CRISPR-Cas Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D064130","descriptor_name":"CRISPR-Associated Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D064130","descriptor_name":"CRISPR-Associated Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D064130","descriptor_name":"CRISPR-Associated Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D064130","descriptor_name":"CRISPR-Associated Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D064130","descriptor_name":"CRISPR-Associated Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D064130","descriptor_name":"CRISPR-Associated Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D064130","descriptor_name":"CRISPR-Associated Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D064130","descriptor_name":"CRISPR-Associated Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1021/acs.jcim.4c00625","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.4c00625","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:38838358","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38838358","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"id":"https://metadata.un.org/sdg/15","display_name":"Life in Land"}],"awards":[{"id":"https://openalex.org/G3127572520","display_name":null,"funder_award_id":"R21GM144860","funder_id":"https://openalex.org/F4320337354","funder_display_name":"National Institute of General Medical Sciences"}],"funders":[{"id":"https://openalex.org/F4320337354","display_name":"National Institute of General Medical Sciences","ror":"https://ror.org/04q48ey07"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":78,"referenced_works":["https://openalex.org/W1849233805","https://openalex.org/W1852971431","https://openalex.org/W1984794455","https://openalex.org/W1988101725","https://openalex.org/W1989932358","https://openalex.org/W1990487336","https://openalex.org/W1993711987","https://openalex.org/W2012468406","https://openalex.org/W2014661586","https://openalex.org/W2042084565","https://openalex.org/W2045435533","https://openalex.org/W2047672715","https://openalex.org/W2064815984","https://openalex.org/W2094403468","https://openalex.org/W2107959600","https://openalex.org/W2118746294","https://openalex.org/W2124812694","https://openalex.org/W2127862779","https://openalex.org/W2129368756","https://openalex.org/W2130479394","https://openalex.org/W2132292391","https://openalex.org/W2134023754","https://openalex.org/W2137721714","https://openalex.org/W2142515819","https://openalex.org/W2150893872","https://openalex.org/W2153344788","https://openalex.org/W2153537688","https://openalex.org/W2156665896","https://openalex.org/W2164260969","https://openalex.org/W2164934988","https://openalex.org/W2166139447","https://openalex.org/W2168930979","https://openalex.org/W2169244146","https://openalex.org/W2182431911","https://openalex.org/W2199695484","https://openalex.org/W2224398381","https://openalex.org/W2399809721","https://openalex.org/W2409671300","https://openalex.org/W2556159813","https://openalex.org/W2587186776","https://openalex.org/W2623277795","https://openalex.org/W2761794675","https://openalex.org/W2767657905","https://openalex.org/W2793168264","https://openalex.org/W2887293266","https://openalex.org/W2896386549","https://openalex.org/W2900824674","https://openalex.org/W2925163882","https://openalex.org/W2950448148","https://openalex.org/W2974536971","https://openalex.org/W2982583112","https://openalex.org/W2995081665","https://openalex.org/W3000188741","https://openalex.org/W3004706073","https://openalex.org/W3004993637","https://openalex.org/W3012305724","https://openalex.org/W3025014177","https://openalex.org/W3034436467","https://openalex.org/W3036373230","https://openalex.org/W3048869844","https://openalex.org/W3083369815","https://openalex.org/W3085845198","https://openalex.org/W3104651685","https://openalex.org/W3135497523","https://openalex.org/W3181319568","https://openalex.org/W3188619241","https://openalex.org/W3189038709","https://openalex.org/W3198395019","https://openalex.org/W4207008342","https://openalex.org/W4221093864","https://openalex.org/W4247523580","https://openalex.org/W4292736630","https://openalex.org/W4294089543","https://openalex.org/W4308616064","https://openalex.org/W4309506674","https://openalex.org/W4311114767","https://openalex.org/W4327559086","https://openalex.org/W4376648037"],"related_works":["https://openalex.org/W3081777379","https://openalex.org/W2615218473","https://openalex.org/W3040709352","https://openalex.org/W2977624170","https://openalex.org/W4394740655","https://openalex.org/W2605577322","https://openalex.org/W2615346513","https://openalex.org/W4307138640","https://openalex.org/W4366132770","https://openalex.org/W2941456574"],"abstract_inverted_index":{"The":[0,15,183],"recent":[1],"development":[2,395],"of":[3,18,33,194,212,252,285,307,317,328,396],"CRISPR-Cas":[4,20],"technology":[5],"holds":[6],"promise":[7],"to":[8,71,85,99,129,214,271,299],"correct":[9],"gene-level":[10],"defects":[11],"for":[12,80,369],"genetic":[13],"diseases.":[14],"key":[16],"element":[17],"the":[19,23,31,87,91,101,113,140,171,177,205,235,248,253,286,303,308,315,318,333,346,394],"system":[21],"is":[22,68,83],"Cas":[24,41,74,92,156,378,397],"protein,":[25],"a":[26,62,69,190,337],"nuclease":[27],"that":[28,218,390],"can":[29,391],"edit":[30],"gene":[32,63],"interest":[34],"assisted":[35],"by":[36],"guide":[37,393],"RNA.":[38],"However,":[39],"these":[40],"proteins":[42,75,134,277,398],"suffer":[43],"from":[44,118,135,332],"inherent":[45],"limitations":[46],"such":[47],"as":[48,61],"large":[49],"size,":[50],"low":[51],"cleavage":[52,305,326],"efficiency,":[53],"and":[54,108,111,132,152,167,179,185,196,224,230,266,278,295,324,344,360,372],"off-target":[55,342],"effects,":[56],"hindering":[57],"their":[58,199],"widespread":[59],"application":[60],"editing":[64,78,401],"tool.":[65],"Therefore,":[66],"there":[67],"need":[70],"identify":[72,112],"novel":[73],"with":[76,106,380,399],"improved":[77,400],"properties,":[79],"which":[81,268],"it":[82],"necessary":[84],"understand":[86],"underlying":[88],"features":[89,104,114,365,385],"governing":[90],"families.":[93],"In":[94],"this":[95],"study,":[96],"we":[97,122,159,356],"aim":[98],"elucidate":[100],"unique":[102],"protein":[103,142],"associated":[105],"Cas9":[107,133,186,249,259,276,289,359],"Cas12":[109,131,184,236,361],"families":[110],"distinguishing":[115],"each":[116],"family":[117],"non-Cas":[119,136,168],"proteins.":[120,157,169],"Here,":[121],"built":[123,160],"Random":[124],"Forest":[125],"(RF)":[126],"binary":[127,187],"classifiers":[128,163],"distinguish":[130],"proteins,":[137],"respectively,":[138],"using":[139],"complete":[141],"feature":[143],"spectrum":[144],"(13,494":[145],"features)":[146],"encoding":[147],"various":[148],"physiochemical,":[149],"topological,":[150],"constitutional,":[151],"coevolutionary":[153],"information":[154],"on":[155,176,198],"Furthermore,":[158],"multiclass":[161,206],"RF":[162],"differentiating":[164],"Cas9,":[165],"Cas12,":[166],"All":[170],"models":[172,188],"were":[173],"evaluated":[174],"rigorously":[175],"test":[178],"independent":[180,201],"data":[181,202],"sets.":[182],"achieved":[189,208],"high":[191],"overall":[192],"accuracy":[193],"92%":[195],"95%":[197],"respective":[200],"sets,":[203],"while":[204,330],"classifier":[207],"an":[209],"F1":[210],"score":[211],"close":[213],"0.98.":[215],"We":[216],"observed":[217],"Quasi-Sequence-Order":[219],"(QSO)":[220],"descriptors":[221,226,256],"like":[222,227],"Schneider.lag":[223],"Composition":[225,241,245],"charge,":[228],"volume,":[229],"polarizability":[231],"are":[232,261,269,297],"predominant":[233],"in":[234,258,302,321,340,348],"family.":[237,250],"Conversely":[238],"Amino":[239],"Acid":[240],"descriptors,":[242],"especially":[243],"Tripeptide":[244],"(TPC),":[246],"predominate":[247],"Four":[251],"top":[254],"10":[255],"identified":[257,357],"classification":[260],"tripeptides":[262],"PWN,":[263],"PYY,":[264],"HHA,":[265],"DHI,":[267],"seen":[270],"be":[272,300],"conserved":[273],"across":[274],"all":[275],"located":[279],"within":[280],"different":[281],"catalytically":[282],"important":[283],"domains":[284],"Streptococcus":[287],"pyogenes":[288],"(SpCas9)":[290],"structure.":[291],"Among":[292],"these,":[293],"DHI":[294],"HHA":[296],"well-known":[298],"involved":[301],"DNA":[304,325],"activity":[306,327],"SpCas9":[309],"protein.":[310],"Mutation":[311],"studies":[312,374],"have":[313],"highlighted":[314],"significance":[316],"PWN":[319],"tripeptide":[320,335],"PAM":[322],"recognition":[323],"SpCas9,":[329],"Y450":[331],"PYY":[334],"plays":[336],"crucial":[338],"role":[339],"reducing":[341],"effects":[343],"improving":[345],"specificity":[347],"SpCas9.":[349],"Leveraging":[350],"our":[351],"machine":[352],"learning":[353],"(ML)":[354],"pipeline,":[355],"numerous":[358],"family-specific":[362],"features.":[363],"These":[364,384],"offer":[366],"valuable":[367],"insights":[368],"future":[370],"experimental":[371],"computational":[373],"aiming":[375],"at":[376],"designing":[377],"systems":[379],"enhanced":[381],"gene-editing":[382],"properties.":[383],"suggest":[386],"plausible":[387],"structural":[388],"modifications":[389],"effectively":[392],"capabilities.":[402]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-16T08:24:45.110214","created_date":"2025-10-10T00:00:00"}
