{"id":"https://openalex.org/W4414495425","doi":"https://doi.org/10.1021/acs.jcim.5c01544","title":"Benchmarking Machine Learning Models for HIV-1 Protease Inhibitor Resistance Prediction: Impact of Data Set Construction and Feature Representation","display_name":"Benchmarking Machine Learning Models for HIV-1 Protease Inhibitor Resistance Prediction: Impact of Data Set Construction and Feature Representation","publication_year":2025,"publication_date":"2025-09-25","ids":{"openalex":"https://openalex.org/W4414495425","doi":"https://doi.org/10.1021/acs.jcim.5c01544","pmid":"https://pubmed.ncbi.nlm.nih.gov/40997769"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.5c01544","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c01544","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c01544?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c01544?ref=article_openPDF","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044017658","display_name":"Roc\u00edo Luc\u00eda Beatriz Riveros Maidana","orcid":"https://orcid.org/0000-0003-3971-6447"},"institutions":[{"id":"https://openalex.org/I4210106529","display_name":"Agriaquaculture Nutritional Genomic Center","ror":"https://ror.org/01k89ak47","country_code":"CL","type":"facility","lineage":["https://openalex.org/I4210106529"]},{"id":"https://openalex.org/I4210157569","display_name":"Centro Universit\u00e1rio Internacional","ror":"https://ror.org/04j9vr008","country_code":"MX","type":"education","lineage":["https://openalex.org/I4210157569"]},{"id":"https://openalex.org/I52325","display_name":"Funda\u00e7\u00e3o Oswaldo Cruz","ror":"https://ror.org/04jhswv08","country_code":"BR","type":"facility","lineage":["https://openalex.org/I52325"]}],"countries":["BR","CL","MX"],"is_corresponding":false,"raw_author_name":"Roc\u00edo Luc\u00eda Beatriz Riveros Maidana","raw_affiliation_strings":["Instituto Oswaldo Cruz/Fiocruz","Laborat\u00f3rio de Gen\u00f3mica Aplicada e Bioinovac\u0328\u00f5es","Programa de P\u00f3s-Graduac\u0328\u00e3o em Biologia Computacional e Sistemas"],"raw_orcid":"https://orcid.org/0000-0003-3971-6447","affiliations":[{"raw_affiliation_string":"Instituto Oswaldo Cruz/Fiocruz","institution_ids":["https://openalex.org/I52325"]},{"raw_affiliation_string":"Laborat\u00f3rio de Gen\u00f3mica Aplicada e Bioinovac\u0328\u00f5es","institution_ids":["https://openalex.org/I4210106529"]},{"raw_affiliation_string":"Programa de P\u00f3s-Graduac\u0328\u00e3o em Biologia Computacional e Sistemas","institution_ids":["https://openalex.org/I4210157569"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035682634","display_name":"Lucas de Almeida Machado","orcid":"https://orcid.org/0000-0002-6575-1687"},"institutions":[{"id":"https://openalex.org/I4210144847","display_name":"Angiologica (Italy)","ror":"https://ror.org/058bza610","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210144847"]},{"id":"https://openalex.org/I4210153839","display_name":"National Institute of Science and Technology of Drugs and Medicines","ror":"https://ror.org/053efsj20","country_code":"BR","type":"facility","lineage":["https://openalex.org/I11385950","https://openalex.org/I4210127675","https://openalex.org/I4210151455","https://openalex.org/I4210153839"]}],"countries":["BR","IT"],"is_corresponding":true,"raw_author_name":"Lucas de Almeida Machado","raw_affiliation_strings":["Institute of Technology on Immunobiologicals (Bio-Manguinhos) - Fiocruz","Institute of Technology on Immunobiologicals (Bio-Manguinhos) - Fiocruz, Rio de Janeiro 21040-900, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-6575-1687","affiliations":[{"raw_affiliation_string":"Institute of Technology on Immunobiologicals (Bio-Manguinhos) - Fiocruz","institution_ids":["https://openalex.org/I4210144847"]},{"raw_affiliation_string":"Institute of Technology on Immunobiologicals (Bio-Manguinhos) - Fiocruz, Rio de Janeiro 21040-900, Brazil","institution_ids":["https://openalex.org/I4210153839"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019125482","display_name":"Ana Carolina Ramos Guimar\u00e3es","orcid":"https://orcid.org/0000-0003-1260-543X"},"institutions":[{"id":"https://openalex.org/I4210106529","display_name":"Agriaquaculture Nutritional Genomic Center","ror":"https://ror.org/01k89ak47","country_code":"CL","type":"facility","lineage":["https://openalex.org/I4210106529"]},{"id":"https://openalex.org/I4210157569","display_name":"Centro Universit\u00e1rio Internacional","ror":"https://ror.org/04j9vr008","country_code":"MX","type":"education","lineage":["https://openalex.org/I4210157569"]},{"id":"https://openalex.org/I52325","display_name":"Funda\u00e7\u00e3o Oswaldo Cruz","ror":"https://ror.org/04jhswv08","country_code":"BR","type":"facility","lineage":["https://openalex.org/I52325"]}],"countries":["BR","CL","MX"],"is_corresponding":false,"raw_author_name":"Ana Carolina Ramos Guimar\u00e3es","raw_affiliation_strings":["Instituto Oswaldo Cruz/Fiocruz","Laborat\u00f3rio de Gen\u00f3mica Aplicada e Bioinovac\u0328\u00f5es","Programa de P\u00f3s-Graduac\u0328\u00e3o em Biologia Computacional e Sistemas"],"raw_orcid":"https://orcid.org/0000-0003-1260-543X","affiliations":[{"raw_affiliation_string":"Instituto Oswaldo Cruz/Fiocruz","institution_ids":["https://openalex.org/I52325"]},{"raw_affiliation_string":"Laborat\u00f3rio de Gen\u00f3mica Aplicada e Bioinovac\u0328\u00f5es","institution_ids":["https://openalex.org/I4210106529"]},{"raw_affiliation_string":"Programa de P\u00f3s-Graduac\u0328\u00e3o em Biologia Computacional e Sistemas","institution_ids":["https://openalex.org/I4210157569"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5035682634"],"corresponding_institution_ids":["https://openalex.org/I4210144847","https://openalex.org/I4210153839"],"apc_list":null,"apc_paid":null,"fwci":2.9332,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.91551303,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"65","issue":"19","first_page":"10037","last_page":"10053"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10526","display_name":"HIV/AIDS drug development and treatment","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2725","display_name":"Infectious Diseases"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10526","display_name":"HIV/AIDS drug development and treatment","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2725","display_name":"Infectious Diseases"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10112","display_name":"HIV Research and Treatment","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2406","display_name":"Virology"},"field":{"id":"https://openalex.org/fields/24","display_name":"Immunology and Microbiology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10151","display_name":"Hepatitis C virus research","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/2721","display_name":"Hepatology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5889999866485596},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5615000128746033},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5275999903678894},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.5220000147819519},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.510699987411499},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5060999989509583},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.44850000739097595},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4293000102043152},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.38920000195503235},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.37959998846054077}],"concepts":[{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7803999781608582},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7583000063896179},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6444000005722046},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5889999866485596},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5615000128746033},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5275999903678894},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.5220000147819519},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.510699987411499},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5060999989509583},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.44850000739097595},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4293000102043152},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41019999980926514},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.38920000195503235},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.37959998846054077},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.36329999566078186},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.3513000011444092},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.34709998965263367},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.33480000495910645},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.3301999866962433},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.29030001163482666},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.28769999742507935},{"id":"https://openalex.org/C2781143361","wikidata":"https://www.wikidata.org/wiki/Q55761529","display_name":"Protease inhibitor (pharmacology)","level":5,"score":0.28139999508857727},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.25380000472068787},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2515000104904175}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015497","descriptor_name":"HIV-1","qualifier_ui":"Q000187","qualifier_name":"drug effects","is_major_topic":true},{"descriptor_ui":"D015497","descriptor_name":"HIV-1","qualifier_ui":"Q000187","qualifier_name":"drug effects","is_major_topic":true},{"descriptor_ui":"D015497","descriptor_name":"HIV-1","qualifier_ui":"Q000187","qualifier_name":"drug effects","is_major_topic":true},{"descriptor_ui":"D015497","descriptor_name":"HIV-1","qualifier_ui":"Q000201","qualifier_name":"enzymology","is_major_topic":true},{"descriptor_ui":"D015497","descriptor_name":"HIV-1","qualifier_ui":"Q000201","qualifier_name":"enzymology","is_major_topic":true},{"descriptor_ui":"D015497","descriptor_name":"HIV-1","qualifier_ui":"Q000201","qualifier_name":"enzymology","is_major_topic":true},{"descriptor_ui":"D016333","descriptor_name":"HIV Protease","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D016333","descriptor_name":"HIV Protease","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D016333","descriptor_name":"HIV Protease","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D016333","descriptor_name":"HIV Protease","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D016333","descriptor_name":"HIV Protease","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D016333","descriptor_name":"HIV Protease","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D016333","descriptor_name":"HIV Protease","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D016333","descriptor_name":"HIV Protease","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D016333","descriptor_name":"HIV Protease","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017320","descriptor_name":"HIV Protease Inhibitors","qualifier_ui":"Q000494","qualifier_name":"pharmacology","is_major_topic":true},{"descriptor_ui":"D017320","descriptor_name":"HIV Protease Inhibitors","qualifier_ui":"Q000494","qualifier_name":"pharmacology","is_major_topic":true},{"descriptor_ui":"D017320","descriptor_name":"HIV Protease Inhibitors","qualifier_ui":"Q000494","qualifier_name":"pharmacology","is_major_topic":true},{"descriptor_ui":"D017320","descriptor_name":"HIV Protease Inhibitors","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D017320","descriptor_name":"HIV Protease Inhibitors","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D017320","descriptor_name":"HIV Protease Inhibitors","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D019985","descriptor_name":"Benchmarking","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019985","descriptor_name":"Benchmarking","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019985","descriptor_name":"Benchmarking","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D024882","descriptor_name":"Drug Resistance, Viral","qualifier_ui":"Q000187","qualifier_name":"drug effects","is_major_topic":true},{"descriptor_ui":"D024882","descriptor_name":"Drug Resistance, Viral","qualifier_ui":"Q000187","qualifier_name":"drug effects","is_major_topic":true},{"descriptor_ui":"D024882","descriptor_name":"Drug Resistance, Viral","qualifier_ui":"Q000187","qualifier_name":"drug effects","is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1021/acs.jcim.5c01544","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c01544","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c01544?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:40997769","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40997769","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null},{"id":"pmh:oai:europepmc.org:11337881","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12529765","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1021/acs.jcim.5c01544","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c01544","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c01544?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1840770272","display_name":null,"funder_award_id":"E-26/","funder_id":"https://openalex.org/F4320322749","funder_display_name":"Funda\u00e7\u00e3o Carlos Chagas Filho de Amparo \u00e0 Pesquisa do Estado do Rio de Janeiro"},{"id":"https://openalex.org/G3325778992","display_name":null,"funder_award_id":"E-26/201","funder_id":"https://openalex.org/F4320322749","funder_display_name":"Funda\u00e7\u00e3o Carlos Chagas Filho de Amparo \u00e0 Pesquisa do Estado do Rio de Janeiro"}],"funders":[{"id":"https://openalex.org/F4320321091","display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","ror":"https://ror.org/00x0ma614"},{"id":"https://openalex.org/F4320322749","display_name":"Funda\u00e7\u00e3o Carlos Chagas Filho de Amparo \u00e0 Pesquisa do Estado do Rio de Janeiro","ror":"https://ror.org/03kk0s825"},{"id":"https://openalex.org/F4320323917","display_name":"Funda\u00e7\u00e3o Oswaldo Cruz","ror":"https://ror.org/04jhswv08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4414495425.pdf","grobid_xml":"https://content.openalex.org/works/W4414495425.grobid-xml"},"referenced_works_count":80,"referenced_works":["https://openalex.org/W1895692106","https://openalex.org/W1965865659","https://openalex.org/W1981669483","https://openalex.org/W1983479017","https://openalex.org/W1988215875","https://openalex.org/W1998654001","https://openalex.org/W2004800530","https://openalex.org/W2021733025","https://openalex.org/W2024213882","https://openalex.org/W2040600082","https://openalex.org/W2054688372","https://openalex.org/W2096451472","https://openalex.org/W2101247921","https://openalex.org/W2106374999","https://openalex.org/W2128685318","https://openalex.org/W2131046434","https://openalex.org/W2136431383","https://openalex.org/W2140227530","https://openalex.org/W2143210482","https://openalex.org/W2151581834","https://openalex.org/W2154164013","https://openalex.org/W2154290668","https://openalex.org/W2154669635","https://openalex.org/W2164443267","https://openalex.org/W2169192079","https://openalex.org/W2288331077","https://openalex.org/W2502949459","https://openalex.org/W2507538451","https://openalex.org/W2509030308","https://openalex.org/W2564554996","https://openalex.org/W2606439133","https://openalex.org/W2623889092","https://openalex.org/W2747271593","https://openalex.org/W2793642174","https://openalex.org/W2892113269","https://openalex.org/W2898110234","https://openalex.org/W2924309353","https://openalex.org/W2945976633","https://openalex.org/W2966230873","https://openalex.org/W2978293727","https://openalex.org/W2994317626","https://openalex.org/W2997793257","https://openalex.org/W3023624884","https://openalex.org/W3027658738","https://openalex.org/W3103156673","https://openalex.org/W3105010596","https://openalex.org/W3157429049","https://openalex.org/W3160331490","https://openalex.org/W3191439782","https://openalex.org/W3196426758","https://openalex.org/W3202242963","https://openalex.org/W4210887158","https://openalex.org/W4213022372","https://openalex.org/W4220807831","https://openalex.org/W4283781398","https://openalex.org/W4293221296","https://openalex.org/W4296604417","https://openalex.org/W4297914773","https://openalex.org/W4310715387","https://openalex.org/W4313910025","https://openalex.org/W4320058779","https://openalex.org/W4321191146","https://openalex.org/W4328121283","https://openalex.org/W4388636888","https://openalex.org/W4391594606","https://openalex.org/W4391594622","https://openalex.org/W4391825813","https://openalex.org/W4392796683","https://openalex.org/W4399141936","https://openalex.org/W4400579407","https://openalex.org/W4400579545","https://openalex.org/W4400579662","https://openalex.org/W4400654257","https://openalex.org/W4400677486","https://openalex.org/W4401046611","https://openalex.org/W4401509101","https://openalex.org/W4405264070","https://openalex.org/W4406253786","https://openalex.org/W4406848094","https://openalex.org/W4407007783"],"related_works":[],"abstract_inverted_index":{"High":[0],"Resolution":[1],"Image":[2],"Download":[3],"MS":[4],"PowerPoint":[5],"Slide":[6],"The":[7,280],"rapid":[8],"emergence":[9],"of":[10,25,140,195,200,217,267],"drug":[11,41,359],"resistance":[12,42,75,278,285,346],"in":[13,309],"viral":[14],"infections":[15],"represents":[16],"a":[17,51,93,213],"significant":[18],"global":[19],"health":[20],"challenge,":[21],"threatening":[22],"the":[23,253,296],"efficacy":[24],"treatments":[26],"for":[27,38,96,343,355],"multiple":[28,68],"diseases.":[29],"Machine":[30],"learning":[31,160],"models":[32,77,161,226],"have":[33],"emerged":[34],"as":[35,50],"valuable":[36],"tools":[37],"predicting":[39],"antiviral":[40],"from":[43,203],"genomic":[44],"data,":[45],"with":[46,83,107,121,137],"HIV-1":[47,71,344],"protease":[48,72],"serving":[49],"well-characterized":[52],"model":[53,218,256,327],"system":[54],"due":[55],"to":[56,230,265],"its":[57],"extensive":[58],"experimental":[59],"data":[60,81,101,115,131,197,320],"and":[61,86,91,128,154,164,167,293,351,358],"clinical":[62,356],"relevance.":[63],"Here,":[64],"we":[65],"systematically":[66],"evaluate":[67],"previously":[69],"published":[70],"inhibitor":[73],"(PI)":[74],"prediction":[76],"across":[78,304],"three":[79],"distinct":[80,276],"sets":[82,237],"different":[84],"preprocessing":[85,181],"ambiguous":[87,111,126,141],"sequencing":[88],"processing":[89],"strategies":[90],"propose":[92],"new":[94],"approach":[95,211],"preprocessing.":[97],"We":[98,143],"tested":[99],"Steiner\u2019s":[100],"set":[102,116,132,198,321],"(":[103,117,133],"n":[104,118,134],"=":[105,119,135,241,246],"1540)":[106],"first-amino-acid":[108],"selection":[109],"at":[110,125,287],"positions,":[112,127],"Shen\u2019s":[113],"expanded":[114,196],"500,390)":[120],"all":[122],"possible":[123],"combinations":[124],"our":[129,208,221],"In-house":[130],"869)":[136],"strict":[138],"exclusion":[139],"sequences.":[142],"compare":[144],"neural":[145,232,341],"networks":[146,233,303,342],"architectures":[147],"(Multilayer":[148],"Perceptron,":[149],"Bidirectional":[150],"Recurrent":[151],"Neural":[152,156],"Network,":[153],"Convolutional":[155],"Network),":[157],"traditional":[158],"machine":[159],"(Random":[162],"Forest":[163],"K-Nearest":[165],"Neighbor),":[166],"logistic":[168,224],"regression":[169,225],"using":[170],"either":[171],"zScales":[172,254,281],"physicochemical":[173,333],"descriptors":[174,282],"or":[175,338],"Rosetta":[176,243,268,297],"energy":[177,298],"terms.":[178],"Sequence":[179],"expansion":[180],"can":[182,336],"artificially":[183],"increase":[184],"performance":[185,228,328],"metrics":[186],"(mean":[187],"AUC:":[188],"0.986\u20130.999)":[189],"by":[190],"creating":[191],"substantial":[192],"redundancy":[193],"(99.6%":[194],"consists":[199],"duplicated":[201],"sequences":[202],"2096":[204],"unique":[205],"originals),":[206],"while":[207,248,295,329],"clustering-based":[209],"validation":[210],"provides":[212],"more":[214],"stringent":[215],"assessment":[216],"generalizability.":[219],"Remarkably,":[220],"physicochemically":[222],"informed":[223],"achieved":[227],"comparable":[229],"complex":[231,340],"on":[234],"challenging":[235],"test":[236],"(zScales":[238],"LR:":[239,244],"AUC":[240,245],"0.973;":[242],"0.944),":[247],"offering":[249,348],"superior":[250],"interpretability.":[251],"Furthermore,":[252],"LR":[255,269],"offered":[257],"significantly":[258],"greater":[259],"computational":[260],"efficiency":[261],"(0.007":[262],"s/prediction)":[263],"compared":[264],"that":[266,331],"(776.117":[270],"s/prediction).":[271],"Mutual":[272],"information":[273],"analysis":[274],"revealed":[275,300],"complementary":[277],"mechanisms:":[279],"identified":[283],"discrete":[284],"hotspots":[286],"positions":[288],"10,":[289],"46,":[290],"54,":[291],"71,":[292],"90,":[294],"terms":[299],"interconnected":[301],"energetic":[302],"structurally":[305],"adjacent":[306],"residues,":[307],"particularly":[308],"functionally":[310],"critical":[311,354],"flap":[312],"regions":[313],"(positions":[314],"46\u201354).":[315],"This":[316],"study":[317],"demonstrates":[318],"how":[319],"construction":[322],"choices":[323],"directly":[324],"impact":[325],"apparent":[326],"establishing":[330],"well-chosen":[332],"feature":[334],"representations":[335],"match":[337],"exceed":[339],"PI":[345],"modeling,":[347],"both":[349],"accuracy":[350],"mechanistic":[352],"interpretability":[353],"implementation":[357],"development.":[360]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-21T07:57:09.225873","created_date":"2025-09-25T00:00:00"}
