{"id":"https://openalex.org/W4205328831","doi":"https://doi.org/10.1109/bibm52615.2021.9669515","title":"Hierarchical Clustering Split for Low-Bias Evaluation of Drug-Target Interaction Prediction","display_name":"Hierarchical Clustering Split for Low-Bias Evaluation of Drug-Target Interaction Prediction","publication_year":2021,"publication_date":"2021-12-09","ids":{"openalex":"https://openalex.org/W4205328831","doi":"https://doi.org/10.1109/bibm52615.2021.9669515"},"language":"en","primary_location":{"id":"doi:10.1109/bibm52615.2021.9669515","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm52615.2021.9669515","pdf_url":null,"source":{"id":"https://openalex.org/S4363607735","display_name":"2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027847171","display_name":"Peizhen Bai","orcid":"https://orcid.org/0000-0003-3027-5518"},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Peizhen Bai","raw_affiliation_strings":["Department of Computer Science, University of Sheffield, Sheffield, UK"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Sheffield, Sheffield, UK","institution_ids":["https://openalex.org/I91136226"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089864495","display_name":"Filip Miljkovi\u0107","orcid":"https://orcid.org/0000-0001-5365-505X"},"institutions":[{"id":"https://openalex.org/I4210143795","display_name":"AstraZeneca (Sweden)","ror":"https://ror.org/04wwrrg31","country_code":"SE","type":"company","lineage":["https://openalex.org/I105036370","https://openalex.org/I4210143795"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Filip Miljkovic","raw_affiliation_strings":["Imaging and Data Analytics, Clinical Pharmacology & Safety Sciences, R&D, AstraZeneca, Gothenburg, Sweden"],"affiliations":[{"raw_affiliation_string":"Imaging and Data Analytics, Clinical Pharmacology & Safety Sciences, R&D, AstraZeneca, Gothenburg, Sweden","institution_ids":["https://openalex.org/I4210143795"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002566778","display_name":"Yan Ge","orcid":"https://orcid.org/0000-0002-4701-6144"},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yan Ge","raw_affiliation_strings":["Department of Computer Science, University of Sheffield, Sheffield, UK"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Sheffield, Sheffield, UK","institution_ids":["https://openalex.org/I91136226"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031183535","display_name":"Nigel Greene","orcid":"https://orcid.org/0000-0003-0433-4596"},"institutions":[{"id":"https://openalex.org/I4210150756","display_name":"AstraZeneca (United States)","ror":"https://ror.org/043cec594","country_code":"US","type":"company","lineage":["https://openalex.org/I105036370","https://openalex.org/I4210150756"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nigel Greene","raw_affiliation_strings":["Imaging and Data Analytics, Clinical Pharmacology & Safety Sciences, R&D, AstraZeneca, Waltham, USA"],"affiliations":[{"raw_affiliation_string":"Imaging and Data Analytics, Clinical Pharmacology & Safety Sciences, R&D, AstraZeneca, Waltham, USA","institution_ids":["https://openalex.org/I4210150756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113997880","display_name":"Bino John","orcid":null},"institutions":[{"id":"https://openalex.org/I4210150756","display_name":"AstraZeneca (United States)","ror":"https://ror.org/043cec594","country_code":"US","type":"company","lineage":["https://openalex.org/I105036370","https://openalex.org/I4210150756"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bino John","raw_affiliation_strings":["Imaging and Data Analytics, Clinical Pharmacology & Safety Sciences, R&D, AstraZeneca, Waltham, USA"],"affiliations":[{"raw_affiliation_string":"Imaging and Data Analytics, Clinical Pharmacology & Safety Sciences, R&D, AstraZeneca, Waltham, USA","institution_ids":["https://openalex.org/I4210150756"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009807039","display_name":"Haiping Lu","orcid":"https://orcid.org/0000-0002-0349-2181"},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Haiping Lu","raw_affiliation_strings":["Department of Computer Science, University of Sheffield, Sheffield, UK"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Sheffield, Sheffield, UK","institution_ids":["https://openalex.org/I91136226"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5027847171"],"corresponding_institution_ids":["https://openalex.org/I91136226"],"apc_list":null,"apc_paid":null,"fwci":5.3186,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.96885813,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"641","last_page":"644"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.7824664115905762},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7446069717407227},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6826441884040833},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6734486222267151},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6588821411132812},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.5373422503471375},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.46359744668006897},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.43470150232315063},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.09271159768104553},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08013090491294861}],"concepts":[{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.7824664115905762},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7446069717407227},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6826441884040833},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6734486222267151},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6588821411132812},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.5373422503471375},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.46359744668006897},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.43470150232315063},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.09271159768104553},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08013090491294861}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bibm52615.2021.9669515","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm52615.2021.9669515","pdf_url":null,"source":{"id":"https://openalex.org/S4363607735","display_name":"2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.whiterose.ac.uk:181814","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400854","display_name":"White Rose Research Online (University of Leeds, The University of Sheffield, University of York)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2800616092","host_organization_name":"White Rose University Consortium","host_organization_lineage":["https://openalex.org/I2800616092"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Proceedings Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1988037271","https://openalex.org/W2080642200","https://openalex.org/W2130942839","https://openalex.org/W2148145769","https://openalex.org/W2204695023","https://openalex.org/W2601243251","https://openalex.org/W2767891136","https://openalex.org/W2785947426","https://openalex.org/W2803094965","https://openalex.org/W2806547269","https://openalex.org/W2807792492","https://openalex.org/W2809216727","https://openalex.org/W2899070097","https://openalex.org/W2899788782","https://openalex.org/W2966848807","https://openalex.org/W3018980093","https://openalex.org/W3028589594","https://openalex.org/W3104508774","https://openalex.org/W3181995918","https://openalex.org/W6679436768","https://openalex.org/W6798300011"],"related_works":["https://openalex.org/W2118717649","https://openalex.org/W410723623","https://openalex.org/W2413243053","https://openalex.org/W2015341305","https://openalex.org/W4225593417","https://openalex.org/W2035068594","https://openalex.org/W2059783833","https://openalex.org/W2573498121","https://openalex.org/W3160494304","https://openalex.org/W4380075502"],"abstract_inverted_index":{"Drug-target":[0],"interaction":[1],"(DTI)":[2],"prediction":[3,58,118,184],"is":[4,66],"important":[5],"in":[6,35,40,55,60,73,109,181],"drug":[7,42,135],"discovery":[8,43],"and":[9,38,76,89,115,139,162,171],"chemogenomics":[10],"studies.":[11],"Machine":[12],"learning,":[13,16],"particularly":[14],"deep":[15,123],"has":[17],"advanced":[18],"this":[19,81],"area":[20],"significantly":[21],"over":[22],"the":[23,32,46,57,106,117,155,158],"past":[24],"few":[25],"years.":[26],"However,":[27],"a":[28,85,110],"significant":[29],"gap":[30,65],"between":[31],"performance":[33,59,64,98,119,149],"reported":[34],"academic":[36],"papers":[37],"that":[39,164],"practical":[41],"settings,":[44],"e.g.":[45],"random-split-based":[47],"evaluation":[48,99],"strategy":[49],"tends":[50],"to":[51,69,96],"be":[52],"too":[53],"optimistic":[54],"estimating":[56],"real-world":[61,101,182],"settings.":[62,102,185],"Such":[63],"largely":[67],"due":[68],"hidden":[70],"data":[71,78,93,107,129],"bias":[72,108],"experimental":[74,152],"datasets":[75],"inappropriate":[77],"split.":[79],"In":[80,143],"paper,":[82],"we":[83,104,145],"construct":[84],"low-bias":[86],"DTI":[87,112,183],"dataset":[88],"study":[90,105],"more":[91,169,175],"challenging":[92,170],"split":[94,130,161],"strategies":[95],"improve":[97],"for":[100],"Specifically,":[103],"popular":[111,159],"dataset,":[113],"BindingDB,":[114],"re-evaluate":[116],"of":[120,157,178],"three":[121],"state-of-the-art":[122],"learning":[124],"models":[125],"using":[126],"five":[127],"different":[128],"strategies:":[131],"random":[132,160],"split,":[133,136,138],"cold":[134],"scaffold":[137],"two":[140],"hierarchical-clustering-based":[141,165],"splits.":[142],"addition,":[144],"comprehensively":[146],"examine":[147],"six":[148],"metrics.":[150],"Our":[151],"results":[153],"confirm":[154],"overoptimism":[156],"show":[163],"splits":[166],"are":[167],"far":[168],"can":[172],"provide":[173],"potentially":[174],"useful":[176],"assessment":[177],"model":[179],"generalizability":[180]},"counts_by_year":[{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":5}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
