{"id":"https://openalex.org/W4409037278","doi":"https://doi.org/10.1515/jib-2024-0054","title":"Towards a more accurate and reliable evaluation of machine learning protein\u2013protein interaction prediction model performance in the presence of unavoidable dataset biases","display_name":"Towards a more accurate and reliable evaluation of machine learning protein\u2013protein interaction prediction model performance in the presence of unavoidable dataset biases","publication_year":2025,"publication_date":"2025-04-01","ids":{"openalex":"https://openalex.org/W4409037278","doi":"https://doi.org/10.1515/jib-2024-0054","pmid":"https://pubmed.ncbi.nlm.nih.gov/40165676"},"language":"en","primary_location":{"id":"doi:10.1515/jib-2024-0054","is_oa":true,"landing_page_url":"https://doi.org/10.1515/jib-2024-0054","pdf_url":"https://www.degruyterbrill.com/document/doi/10.1515/jib-2024-0054/pdf","source":{"id":"https://openalex.org/S4393917359","display_name":"Berichte aus der medizinischen Informatik und Bioinformatik/Journal of integrative bioinformatics","issn_l":"1432-4385","issn":["1432-4385","1613-4516"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Integrative Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://www.degruyterbrill.com/document/doi/10.1515/jib-2024-0054/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027776357","display_name":"Alba Nogueira-Rodr\u00edguez","orcid":"https://orcid.org/0000-0001-5991-7698"},"institutions":[{"id":"https://openalex.org/I4210091478","display_name":"Galicia Sur Biomedical Foundation","ror":"https://ror.org/00jdfsf63","country_code":"ES","type":"other","lineage":["https://openalex.org/I4210091478"]},{"id":"https://openalex.org/I4210158352","display_name":"Servicio Gallego de Salud","ror":"https://ror.org/0591s4t67","country_code":"ES","type":"healthcare","lineage":["https://openalex.org/I4210158352"]},{"id":"https://openalex.org/I4210158732","display_name":"i3S - Instituto de Investiga\u00e7\u00e3o e Inova\u00e7\u00e3o em Sa\u00fade, Universidade do Porto","ror":"https://ror.org/04wjk1035","country_code":"PT","type":"facility","lineage":["https://openalex.org/I182534213","https://openalex.org/I4210158732"]}],"countries":["ES","PT"],"is_corresponding":true,"raw_author_name":"Alba Nogueira-Rodr\u00edguez","raw_affiliation_strings":["Instituto de Investiga\u00e7\u00e3o e Inova\u00e7\u00e3o em Sa\u00fade (i3S) , Universidade do Porto , Rua Alfredo Allen 208, 4200-135 Porto , Portugal","SING Research Group, Galicia Sur Health Research Institute (IIS Galicia Sur), SERGAS-UVIGO , 36213 Vigo , Spain"],"raw_orcid":"https://orcid.org/0000-0001-5991-7698","affiliations":[{"raw_affiliation_string":"Instituto de Investiga\u00e7\u00e3o e Inova\u00e7\u00e3o em Sa\u00fade (i3S) , Universidade do Porto , Rua Alfredo Allen 208, 4200-135 Porto , Portugal","institution_ids":["https://openalex.org/I4210158732"]},{"raw_affiliation_string":"SING Research Group, Galicia Sur Health Research Institute (IIS Galicia Sur), SERGAS-UVIGO , 36213 Vigo , Spain","institution_ids":["https://openalex.org/I4210091478","https://openalex.org/I4210158352"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044007257","display_name":"Daniel Glez\u2010Pe\u00f1a","orcid":"https://orcid.org/0000-0002-6129-7245"},"institutions":[{"id":"https://openalex.org/I4210091478","display_name":"Galicia Sur Biomedical Foundation","ror":"https://ror.org/00jdfsf63","country_code":"ES","type":"other","lineage":["https://openalex.org/I4210091478"]},{"id":"https://openalex.org/I4210158352","display_name":"Servicio Gallego de Salud","ror":"https://ror.org/0591s4t67","country_code":"ES","type":"healthcare","lineage":["https://openalex.org/I4210158352"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Daniel Glez-Pe\u00f1a","raw_affiliation_strings":["SING Research Group, Galicia Sur Health Research Institute (IIS Galicia Sur), SERGAS-UVIGO , 36213 Vigo , Spain"],"raw_orcid":"https://orcid.org/0000-0002-6129-7245","affiliations":[{"raw_affiliation_string":"SING Research Group, Galicia Sur Health Research Institute (IIS Galicia Sur), SERGAS-UVIGO , 36213 Vigo , Spain","institution_ids":["https://openalex.org/I4210091478","https://openalex.org/I4210158352"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081065000","display_name":"Cristina P. Vieira","orcid":"https://orcid.org/0000-0002-7139-2107"},"institutions":[{"id":"https://openalex.org/I4210158732","display_name":"i3S - Instituto de Investiga\u00e7\u00e3o e Inova\u00e7\u00e3o em Sa\u00fade, Universidade do Porto","ror":"https://ror.org/04wjk1035","country_code":"PT","type":"facility","lineage":["https://openalex.org/I182534213","https://openalex.org/I4210158732"]},{"id":"https://openalex.org/I4387153674","display_name":"Instituto de Biologia Molecular e Celular","ror":"https://ror.org/005dkht93","country_code":null,"type":"education","lineage":["https://openalex.org/I182534213","https://openalex.org/I4387153674"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Cristina P. Vieira","raw_affiliation_strings":["Instituto de Biologia Molecular e Celular (IBMC) , Rua Alfredo Allen, 208, 4200-135 Porto , Portugal","Instituto de Investiga\u00e7\u00e3o e Inova\u00e7\u00e3o em Sa\u00fade (i3S) , Universidade do Porto , Rua Alfredo Allen 208, 4200-135 Porto , Portugal"],"raw_orcid":"https://orcid.org/0000-0002-7139-2107","affiliations":[{"raw_affiliation_string":"Instituto de Biologia Molecular e Celular (IBMC) , Rua Alfredo Allen, 208, 4200-135 Porto , Portugal","institution_ids":["https://openalex.org/I4387153674"]},{"raw_affiliation_string":"Instituto de Investiga\u00e7\u00e3o e Inova\u00e7\u00e3o em Sa\u00fade (i3S) , Universidade do Porto , Rua Alfredo Allen 208, 4200-135 Porto , Portugal","institution_ids":["https://openalex.org/I4210158732"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015978945","display_name":"Jorge Vieira","orcid":"https://orcid.org/0000-0001-7032-5220"},"institutions":[{"id":"https://openalex.org/I4210158732","display_name":"i3S - Instituto de Investiga\u00e7\u00e3o e Inova\u00e7\u00e3o em Sa\u00fade, Universidade do Porto","ror":"https://ror.org/04wjk1035","country_code":"PT","type":"facility","lineage":["https://openalex.org/I182534213","https://openalex.org/I4210158732"]},{"id":"https://openalex.org/I4387153674","display_name":"Instituto de Biologia Molecular e Celular","ror":"https://ror.org/005dkht93","country_code":null,"type":"education","lineage":["https://openalex.org/I182534213","https://openalex.org/I4387153674"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Jorge Vieira","raw_affiliation_strings":["Instituto de Biologia Molecular e Celular (IBMC) , Rua Alfredo Allen, 208, 4200-135 Porto , Portugal","Instituto de Investiga\u00e7\u00e3o e Inova\u00e7\u00e3o em Sa\u00fade (i3S) , Universidade do Porto , Rua Alfredo Allen 208, 4200-135 Porto , Portugal"],"raw_orcid":"https://orcid.org/0000-0001-7032-5220","affiliations":[{"raw_affiliation_string":"Instituto de Biologia Molecular e Celular (IBMC) , Rua Alfredo Allen, 208, 4200-135 Porto , Portugal","institution_ids":["https://openalex.org/I4387153674"]},{"raw_affiliation_string":"Instituto de Investiga\u00e7\u00e3o e Inova\u00e7\u00e3o em Sa\u00fade (i3S) , Universidade do Porto , Rua Alfredo Allen 208, 4200-135 Porto , Portugal","institution_ids":["https://openalex.org/I4210158732"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038155908","display_name":"Hugo L\u00f3pez-Fern\u00e1ndez","orcid":"https://orcid.org/0000-0002-6476-7206"},"institutions":[{"id":"https://openalex.org/I4210091478","display_name":"Galicia Sur Biomedical Foundation","ror":"https://ror.org/00jdfsf63","country_code":"ES","type":"other","lineage":["https://openalex.org/I4210091478"]},{"id":"https://openalex.org/I4210158352","display_name":"Servicio Gallego de Salud","ror":"https://ror.org/0591s4t67","country_code":"ES","type":"healthcare","lineage":["https://openalex.org/I4210158352"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Hugo L\u00f3pez-Fern\u00e1ndez","raw_affiliation_strings":["SING Research Group, Galicia Sur Health Research Institute (IIS Galicia Sur), SERGAS-UVIGO , 36213 Vigo , Spain"],"raw_orcid":"https://orcid.org/0000-0002-6476-7206","affiliations":[{"raw_affiliation_string":"SING Research Group, Galicia Sur Health Research Institute (IIS Galicia Sur), SERGAS-UVIGO , 36213 Vigo , Spain","institution_ids":["https://openalex.org/I4210091478","https://openalex.org/I4210158352"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5027776357"],"corresponding_institution_ids":["https://openalex.org/I4210091478","https://openalex.org/I4210158352","https://openalex.org/I4210158732"],"apc_list":null,"apc_paid":null,"fwci":0.6802,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.67413727,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"22","issue":"2","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.8180738687515259},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6424197554588318},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6005045771598816},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.5985741019248962},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5641210079193115},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.49172213673591614},{"id":"https://openalex.org/keywords/performance-metric","display_name":"Performance metric","score":0.47847506403923035},{"id":"https://openalex.org/keywords/protein-sequencing","display_name":"Protein sequencing","score":0.4456322491168976},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.41768503189086914},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3600914180278778},{"id":"https://openalex.org/keywords/peptide-sequence","display_name":"Peptide sequence","score":0.12059789896011353},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.11927375197410583},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07135114073753357}],"concepts":[{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.8180738687515259},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6424197554588318},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6005045771598816},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.5985741019248962},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5641210079193115},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.49172213673591614},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.47847506403923035},{"id":"https://openalex.org/C10010492","wikidata":"https://www.wikidata.org/wiki/Q3142557","display_name":"Protein sequencing","level":4,"score":0.4456322491168976},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41768503189086914},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3600914180278778},{"id":"https://openalex.org/C167625842","wikidata":"https://www.wikidata.org/wiki/Q899763","display_name":"Peptide sequence","level":3,"score":0.12059789896011353},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.11927375197410583},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07135114073753357},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D025941","descriptor_name":"Protein Interaction Mapping","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D025941","descriptor_name":"Protein Interaction Mapping","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D025941","descriptor_name":"Protein Interaction Mapping","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1515/jib-2024-0054","is_oa":true,"landing_page_url":"https://doi.org/10.1515/jib-2024-0054","pdf_url":"https://www.degruyterbrill.com/document/doi/10.1515/jib-2024-0054/pdf","source":{"id":"https://openalex.org/S4393917359","display_name":"Berichte aus der medizinischen Informatik und Bioinformatik/Journal of integrative bioinformatics","issn_l":"1432-4385","issn":["1432-4385","1613-4516"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Integrative Bioinformatics","raw_type":"journal-article"},{"id":"pmid:40165676","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40165676","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of integrative bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:100b0f1d285d4d4d926d928803714eb3","is_oa":true,"landing_page_url":"https://doaj.org/article/100b0f1d285d4d4d926d928803714eb3","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Integrative Bioinformatics, Vol 22, Iss 2, Pp 489-96 (2025)","raw_type":"article"},{"id":"pmh:oai:europepmc.org:11375584","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12569588","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1515/jib-2024-0054","is_oa":true,"landing_page_url":"https://doi.org/10.1515/jib-2024-0054","pdf_url":"https://www.degruyterbrill.com/document/doi/10.1515/jib-2024-0054/pdf","source":{"id":"https://openalex.org/S4393917359","display_name":"Berichte aus der medizinischen Informatik und Bioinformatik/Journal of integrative bioinformatics","issn_l":"1432-4385","issn":["1432-4385","1613-4516"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Integrative Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.5299999713897705}],"awards":[{"id":"https://openalex.org/G3969145849","display_name":null,"funder_award_id":"ED481B-2023-005","funder_id":"https://openalex.org/F4320325108","funder_display_name":"Conseller\u00eda de Cultura, Educaci\u00f3n e Ordenaci\u00f3n Universitaria, Xunta de Galicia"},{"id":"https://openalex.org/G4718743385","display_name":null,"funder_award_id":"ED431C 2022/03-GRC","funder_id":"https://openalex.org/F4320325108","funder_display_name":"Conseller\u00eda de Cultura, Educaci\u00f3n e Ordenaci\u00f3n Universitaria, Xunta de Galicia"}],"funders":[{"id":"https://openalex.org/F4320325108","display_name":"Conseller\u00eda de Cultura, Educaci\u00f3n e Ordenaci\u00f3n Universitaria, Xunta de Galicia","ror":"https://ror.org/03gq5rs15"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409037278.pdf","grobid_xml":"https://content.openalex.org/works/W4409037278.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W1926568554","https://openalex.org/W2008840001","https://openalex.org/W2009405916","https://openalex.org/W2012480738","https://openalex.org/W2100025044","https://openalex.org/W2100070946","https://openalex.org/W2120973253","https://openalex.org/W2122863289","https://openalex.org/W2123357765","https://openalex.org/W2131562240","https://openalex.org/W2151412657","https://openalex.org/W2152705149","https://openalex.org/W2163480486","https://openalex.org/W2170747616","https://openalex.org/W2592644437","https://openalex.org/W2616246685","https://openalex.org/W2885583144","https://openalex.org/W2957436444","https://openalex.org/W2995514860","https://openalex.org/W3177500196","https://openalex.org/W4205177317","https://openalex.org/W4255136093","https://openalex.org/W4308834893","https://openalex.org/W4327550249","https://openalex.org/W4383058747","https://openalex.org/W4389742650","https://openalex.org/W4391629225","https://openalex.org/W4392500220","https://openalex.org/W4400984795","https://openalex.org/W4401455507"],"related_works":["https://openalex.org/W4361804730","https://openalex.org/W2142113611","https://openalex.org/W2334467465","https://openalex.org/W2087870008","https://openalex.org/W2162534555","https://openalex.org/W2752178021","https://openalex.org/W2107419853","https://openalex.org/W2143024819","https://openalex.org/W4247159817","https://openalex.org/W2964201926"],"abstract_inverted_index":{"The":[0,95],"characterization":[1],"of":[2,48,187,189,197],"protein-protein":[3],"interactions":[4],"(PPIs)":[5],"is":[6,131,142],"fundamental":[7],"to":[8,25,45,73,110,144,153,173],"understand":[9],"cellular":[10],"functions.":[11],"Although":[12],"machine":[13],"learning":[14],"methods":[15],"in":[16,61,77,80,103,133],"this":[17,40],"task":[18],"have":[19],"historically":[20],"reported":[21],"prediction":[22],"accuracies":[23],"up":[24],"95":[26],"%,":[27],"including":[28],"those":[29,111],"only":[30,180],"using":[31,179],"raw":[32,115],"protein":[33,122,136,191],"sequences,":[34],"it":[35],"has":[36],"been":[37],"highlighted":[38],"that":[39,53],"could":[41,158],"be":[42,159,174],"overestimated":[43],"due":[44],"the":[46,62,78,114,129,140,166,185,195,198],"use":[47,154,196],"random":[49,82,105,155],"splits":[50,85],"and":[51,83,124],"metrics":[52],"do":[54],"not":[55],"take":[56],"into":[57],"account":[58],"potential":[59],"biases":[60],"datasets.":[63],"Here,":[64],"we":[65],"propose":[66],"a":[67,75,99,104,146],"per-protein":[68],"utility":[69],"metric,":[70],"pp_MCC,":[71],"able":[72,143],"show":[74],"drop":[76],"performance":[79,101,149,169],"both":[81],"unseen-protein":[84],"scenarios.":[86],"We":[87],"tested":[88],"ML":[89],"models":[90],"based":[91],"on":[92],"sequence":[93,182],"embeddings.":[94],"pp_MCC":[96,130,199],"metric":[97,117,141],"evidences":[98],"reduced":[100],"even":[102,126],"split,":[106,123],"reaching":[107],"levels":[108],"similar":[109],"shown":[112],"by":[113],"MCC":[116],"computed":[118],"over":[119],"an":[120,134],"unseen":[121,135],"drops":[125],"further":[127],"when":[128,178],"used":[132],"split":[137],"scenario.":[138],"Thus,":[139],"give":[145],"more":[147,162],"realistic":[148],"estimation":[150],"while":[151],"allowing":[152],"splits,":[156],"which":[157],"interesting":[160],"for":[161,176],"protein-centric":[163],"studies.":[164],"Given":[165],"low":[167],"adjusted":[168],"obtained,":[170],"there":[171],"seems":[172],"room":[175],"improvement":[177],"primary":[181],"information,":[183],"suggesting":[184],"need":[186],"inclusion":[188],"complementary":[190],"data,":[192],"accompanied":[193],"with":[194],"metric.":[200]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
