{"id":"https://openalex.org/W2936599225","doi":"https://doi.org/10.1093/bib/bby126","title":"Investigating the role of Simpson\u2019s paradox in the analysis of top-ranked features in high-dimensional bioinformatics datasets","display_name":"Investigating the role of Simpson\u2019s paradox in the analysis of top-ranked features in high-dimensional bioinformatics datasets","publication_year":2018,"publication_date":"2018-12-07","ids":{"openalex":"https://openalex.org/W2936599225","doi":"https://doi.org/10.1093/bib/bby126","mag":"2936599225","pmid":"https://pubmed.ncbi.nlm.nih.gov/30629111"},"language":"en","primary_location":{"id":"doi:10.1093/bib/bby126","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bib/bby126","pdf_url":null,"source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087201377","display_name":"Alex A. Freitas","orcid":"https://orcid.org/0000-0001-9825-4700"},"institutions":[{"id":"https://openalex.org/I20581793","display_name":"University of Kent","ror":"https://ror.org/00xkeyj56","country_code":"GB","type":"education","lineage":["https://openalex.org/I20581793"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Alex A Freitas","raw_affiliation_strings":["University of Kent, Kent, UK"],"affiliations":[{"raw_affiliation_string":"University of Kent, Kent, UK","institution_ids":["https://openalex.org/I20581793"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5087201377"],"corresponding_institution_ids":["https://openalex.org/I20581793"],"apc_list":{"value":4011,"currency":"USD","value_usd":4011},"apc_paid":null,"fwci":0.3724,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.6178109,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"21","issue":"2","first_page":"421","last_page":"428"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.7359916567802429},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5738064646720886},{"id":"https://openalex.org/keywords/variable","display_name":"Variable (mathematics)","score":0.5485849380493164},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5212893486022949},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.518738329410553},{"id":"https://openalex.org/keywords/gene-ontology","display_name":"Gene ontology","score":0.5170667767524719},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5139619708061218},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.46919748187065125},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.45686575770378113},{"id":"https://openalex.org/keywords/high-dimensional","display_name":"High dimensional","score":0.44116485118865967},{"id":"https://openalex.org/keywords/confounding","display_name":"Confounding","score":0.4318335950374603},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.42234712839126587},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34355077147483826},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2691514790058136},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.19033440947532654},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.14523714780807495},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.13750192523002625}],"concepts":[{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.7359916567802429},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5738064646720886},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.5485849380493164},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5212893486022949},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.518738329410553},{"id":"https://openalex.org/C2987395477","wikidata":"https://www.wikidata.org/wiki/Q135085","display_name":"Gene ontology","level":4,"score":0.5170667767524719},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5139619708061218},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.46919748187065125},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.45686575770378113},{"id":"https://openalex.org/C3019722297","wikidata":"https://www.wikidata.org/wiki/Q4440864","display_name":"High dimensional","level":2,"score":0.44116485118865967},{"id":"https://openalex.org/C77350462","wikidata":"https://www.wikidata.org/wiki/Q1125472","display_name":"Confounding","level":2,"score":0.4318335950374603},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.42234712839126587},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34355077147483826},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2691514790058136},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.19033440947532654},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.14523714780807495},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.13750192523002625},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D066264","descriptor_name":"Datasets as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D066264","descriptor_name":"Datasets as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D066264","descriptor_name":"Datasets as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1093/bib/bby126","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bib/bby126","pdf_url":null,"source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},{"id":"pmid:30629111","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/30629111","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in bioinformatics","raw_type":null},{"id":"pmh:oai:kar.kent.ac.uk:72582","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bib%2Fbby126>)","pdf_url":null,"source":{"id":"https://openalex.org/S4377196264","display_name":"Kent Academic Repository (University of Kent)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I20581793","host_organization_name":"University of Kent","host_organization_lineage":["https://openalex.org/I20581793"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W284239745","https://openalex.org/W1493357981","https://openalex.org/W1500895378","https://openalex.org/W1505191356","https://openalex.org/W1545302199","https://openalex.org/W1851748800","https://openalex.org/W1922017469","https://openalex.org/W1994166779","https://openalex.org/W2035841117","https://openalex.org/W2049137946","https://openalex.org/W2054673300","https://openalex.org/W2069645861","https://openalex.org/W2103073877","https://openalex.org/W2119387367","https://openalex.org/W2125055259","https://openalex.org/W2143700987","https://openalex.org/W2164411392","https://openalex.org/W2260771218","https://openalex.org/W2436695585","https://openalex.org/W2516938563","https://openalex.org/W2581109417","https://openalex.org/W2740196207","https://openalex.org/W2780269456","https://openalex.org/W2792808664","https://openalex.org/W2800123026","https://openalex.org/W2805310212","https://openalex.org/W2953367535","https://openalex.org/W3097993951","https://openalex.org/W3133236490","https://openalex.org/W3152507489","https://openalex.org/W4205699531","https://openalex.org/W6601506053","https://openalex.org/W6629638915","https://openalex.org/W6692628530","https://openalex.org/W6717793358","https://openalex.org/W6763809837"],"related_works":["https://openalex.org/W2994176440","https://openalex.org/W2510575233","https://openalex.org/W4249305026","https://openalex.org/W4200276825","https://openalex.org/W4387870091","https://openalex.org/W4283773708","https://openalex.org/W2885518121","https://openalex.org/W2145057176","https://openalex.org/W2745463053","https://openalex.org/W2963977223"],"abstract_inverted_index":{"An":[0],"important":[1,10,64],"problem":[2,26],"in":[3,20,58,75,120,126,130],"bioinformatics":[4,128],"consists":[5],"of":[6,18,43,56,78,100,103,117,123,137,167,182,194],"identifying":[7],"the":[8,47,68,71,82,101,115,121,134,145,159,171,195],"most":[9,48],"features":[11,19,50],"(or":[12],"predictors),":[13],"among":[14],"a":[15,21,32,40,88,91,104,142],"large":[16,54],"number":[17,55],"given":[22],"classification":[23],"dataset.":[24],"This":[25],"is":[27],"often":[28],"addressed":[29],"by":[30],"using":[31,152],"machine":[33,160],"learning-based":[34],"feature":[35,155,196],"ranking":[36,156,197],"method":[37],"to":[38,132],"identify":[39],"small":[41],"set":[42],"top-ranked":[44,72,124,186],"predictors":[45,73,125,172,187],"(i.e.":[46],"relevant":[49],"for":[51,192],"classification).":[52],"The":[53,177],"studies":[57],"this":[59,109],"area":[60],"has,":[61],"however,":[62],"an":[63,76,139],"limitation:":[65],"they":[66],"ignore":[67],"possibility":[69],"that":[70,180],"occur":[74],"instance":[77],"Simpson's":[79,118,183],"paradox,":[80],"where":[81,170],"positive":[83],"or":[84],"negative":[85],"association":[86,140],"between":[87,141],"predictor":[89,143],"and":[90,113,144,163],"class":[92,146],"variable":[93],"reverses":[94],"sign":[95],"upon":[96],"conditional":[97],"on":[98],"each":[99],"values":[102],"third":[105],"(confounder)":[106],"variable.":[107,147],"In":[108],"work,":[110],"we":[111],"review":[112],"investigate":[114],"role":[116],"paradox":[119,184],"analysis":[122],"high-dimensional":[127,165],"datasets,":[129],"order":[131],"avoid":[133],"potential":[135],"danger":[136],"misinterpreting":[138],"We":[148],"perform":[149],"computational":[150],"experiments":[151],"four":[153],"well-known":[154],"methods":[157],"from":[158],"learning":[161],"field":[162],"five":[164],"datasets":[166],"ageing-related":[168],"genes,":[169],"are":[173,188],"Gene":[174],"Ontology":[175],"terms.":[176],"results":[178],"show":[179],"occurrences":[181],"involving":[185],"much":[189],"more":[190],"common":[191],"one":[193],"methods.":[198]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
