{"id":"https://openalex.org/W2185628700","doi":"https://doi.org/10.1093/bib/bbv082","title":"Correct machine learning on protein sequences: a peer-reviewing perspective","display_name":"Correct machine learning on protein sequences: a peer-reviewing perspective","publication_year":2015,"publication_date":"2015-09-26","ids":{"openalex":"https://openalex.org/W2185628700","doi":"https://doi.org/10.1093/bib/bbv082","mag":"2185628700","pmid":"https://pubmed.ncbi.nlm.nih.gov/26411473"},"language":"en","primary_location":{"id":"doi:10.1093/bib/bbv082","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbv082","pdf_url":"https://academic.oup.com/bib/article-pdf/17/5/831/17485706/bbv082.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://academic.oup.com/bib/article-pdf/17/5/831/17485706/bbv082.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060625212","display_name":"Ian Walsh","orcid":"https://orcid.org/0000-0003-3994-5522"},"institutions":[{"id":"https://openalex.org/I138689650","display_name":"University of Padua","ror":"https://ror.org/00240q980","country_code":"IT","type":"education","lineage":["https://openalex.org/I138689650"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Ian Walsh","raw_affiliation_strings":["Dept. of Biomedical Sciences, University of Padua, viale G. Colombo 3, 35131 Padova, Italy"],"affiliations":[{"raw_affiliation_string":"Dept. of Biomedical Sciences, University of Padua, viale G. Colombo 3, 35131 Padova, Italy","institution_ids":["https://openalex.org/I138689650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016630267","display_name":"Gianluca Pollastri","orcid":"https://orcid.org/0000-0002-5825-4949"},"institutions":[{"id":"https://openalex.org/I138689650","display_name":"University of Padua","ror":"https://ror.org/00240q980","country_code":"IT","type":"education","lineage":["https://openalex.org/I138689650"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Gianluca Pollastri","raw_affiliation_strings":["Dept. of Biomedical Sciences, University of Padua, viale G. Colombo 3, 35131 Padova, Italy"],"affiliations":[{"raw_affiliation_string":"Dept. of Biomedical Sciences, University of Padua, viale G. Colombo 3, 35131 Padova, Italy","institution_ids":["https://openalex.org/I138689650"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066560665","display_name":"Silvio C. E. Tosatto","orcid":"https://orcid.org/0000-0003-4525-7793"},"institutions":[{"id":"https://openalex.org/I138689650","display_name":"University of Padua","ror":"https://ror.org/00240q980","country_code":"IT","type":"education","lineage":["https://openalex.org/I138689650"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Silvio C. E. Tosatto","raw_affiliation_strings":["Dept. of Biomedical Sciences, University of Padua, viale G. Colombo 3, 35131 Padova, Italy"],"affiliations":[{"raw_affiliation_string":"Dept. of Biomedical Sciences, University of Padua, viale G. Colombo 3, 35131 Padova, Italy","institution_ids":["https://openalex.org/I138689650"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5066560665"],"corresponding_institution_ids":["https://openalex.org/I138689650"],"apc_list":{"value":4011,"currency":"USD","value_usd":4011},"apc_paid":null,"fwci":2.2636,"has_fulltext":true,"cited_by_count":82,"citation_normalized_percentile":{"value":0.88405848,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"17","issue":"5","first_page":"831","last_page":"840"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7969459295272827},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.764039158821106},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6723459362983704},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.6348932981491089},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5423650741577148},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5388990640640259},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.45927128195762634},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.44817084074020386}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7969459295272827},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.764039158821106},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6723459362983704},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.6348932981491089},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5423650741577148},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5388990640640259},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.45927128195762634},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.44817084074020386},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1093/bib/bbv082","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbv082","pdf_url":"https://academic.oup.com/bib/article-pdf/17/5/831/17485706/bbv082.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},{"id":"pmid:26411473","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/26411473","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in bioinformatics","raw_type":null},{"id":"pmh:oai:open-archive.highwire.org:bib:17/5/831","is_oa":false,"landing_page_url":"http://bib.oxfordjournals.org/cgi/content/short/17/5/831","pdf_url":null,"source":{"id":"https://openalex.org/S4406923041","display_name":"HighWire Press Open Archive","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"TEXT"},{"id":"pmh:oai:www.research.unipd.it:11577/3230654","is_oa":false,"landing_page_url":"http://hdl.handle.net/11577/3230654","pdf_url":null,"source":{"id":"https://openalex.org/S4306402547","display_name":"Padua Research Archive (University of Padova)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I138689650","host_organization_name":"University of Padua","host_organization_lineage":["https://openalex.org/I138689650"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1093/bib/bbv082","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbv082","pdf_url":"https://academic.oup.com/bib/article-pdf/17/5/831/17485706/bbv082.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2334119310","display_name":null,"funder_award_id":"GR-2011-02347754","funder_id":"https://openalex.org/F4320321781","funder_display_name":"Ministero della Salute"},{"id":"https://openalex.org/G3526188868","display_name":null,"funder_award_id":"GR-2011-02346845","funder_id":"https://openalex.org/F4320321781","funder_display_name":"Ministero della Salute"}],"funders":[{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"},{"id":"https://openalex.org/F4320321781","display_name":"Ministero della Salute","ror":"https://ror.org/00789fa95"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2185628700.pdf","grobid_xml":"https://content.openalex.org/works/W2185628700.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W1499450468","https://openalex.org/W1505191356","https://openalex.org/W1508885706","https://openalex.org/W1510073064","https://openalex.org/W1967112996","https://openalex.org/W1969007008","https://openalex.org/W1981552604","https://openalex.org/W1982267716","https://openalex.org/W1996342555","https://openalex.org/W2005129098","https://openalex.org/W2030776726","https://openalex.org/W2047544230","https://openalex.org/W2047750588","https://openalex.org/W2048947602","https://openalex.org/W2072132202","https://openalex.org/W2076586303","https://openalex.org/W2091940856","https://openalex.org/W2092247420","https://openalex.org/W2096929799","https://openalex.org/W2098740506","https://openalex.org/W2109801072","https://openalex.org/W2110149903","https://openalex.org/W2112081648","https://openalex.org/W2117486996","https://openalex.org/W2119387367","https://openalex.org/W2119479037","https://openalex.org/W2119939131","https://openalex.org/W2130161623","https://openalex.org/W2137219016","https://openalex.org/W2137995988","https://openalex.org/W2141007997","https://openalex.org/W2143199763","https://openalex.org/W2147526198","https://openalex.org/W2148633389","https://openalex.org/W2148740464","https://openalex.org/W2153153865","https://openalex.org/W2153187042","https://openalex.org/W2156125289","https://openalex.org/W2157437977","https://openalex.org/W2158698691","https://openalex.org/W2158714788","https://openalex.org/W2164632416","https://openalex.org/W2167277498","https://openalex.org/W2214916291","https://openalex.org/W2605068739","https://openalex.org/W2751318774","https://openalex.org/W2997833137","https://openalex.org/W3104887532","https://openalex.org/W3198350258","https://openalex.org/W4285719527","https://openalex.org/W6688612899"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W2913146933","https://openalex.org/W2372385138","https://openalex.org/W4296359239","https://openalex.org/W92963860","https://openalex.org/W2041249487"],"abstract_inverted_index":{"Machine":[0,13],"learning":[1,14,56],"methods":[2],"are":[3],"becoming":[4],"increasingly":[5],"popular":[6],"to":[7,32,45,52,62,69,82,100,118],"predict":[8],"protein":[9],"features":[10],"from":[11],"sequences.":[12],"in":[15,123],"bioinformatics":[16],"can":[17],"be":[18,70,101],"powerful":[19],"but":[20],"carries":[21],"also":[22,89],"the":[23,36,75,112,120],"risk":[24],"of":[25,35,43],"introducing":[26],"unexpected":[27],"biases,":[28],"which":[29,67,87],"may":[30],"lead":[31],"an":[33],"overestimation":[34],"performance.":[37],"This":[38],"article":[39],"espouses":[40],"a":[41],"set":[42],"guidelines":[44,114],"allow":[46],"both":[47],"peer":[48],"reviewers":[49],"and":[50,72,77],"authors":[51],"avoid":[53,83],"common":[54],"machine":[55,124],"pitfalls.":[57],"Understanding":[58],"biology":[59],"is":[60,80,88],"necessary":[61],"produce":[63],"useful":[64],"data":[65],"sets,":[66],"have":[68],"large":[71],"diverse.":[73],"Separating":[74],"training":[76],"test":[78],"process":[79],"imperative":[81],"over-selling":[84],"method":[85],"performance,":[86],"dependent":[90],"on":[91],"several":[92,104],"hidden":[93],"parameters.":[94],"A":[95],"novel":[96],"predictor":[97],"has":[98],"always":[99],"compared":[102],"with":[103],"existing":[105],"methods,":[106],"including":[107],"simple":[108],"baseline":[109],"strategies.":[110],"Using":[111],"presented":[113],"will":[115],"help":[116],"nonspecialists":[117],"appreciate":[119],"critical":[121],"issues":[122],"learning.":[125]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
