{"id":"https://openalex.org/W4389205601","doi":"https://doi.org/10.1186/s40537-023-00853-x","title":"The use of class imbalanced learning methods on ULSAM data to predict the case\u2013control status in genome-wide association studies","display_name":"The use of class imbalanced learning methods on ULSAM data to predict the case\u2013control status in genome-wide association studies","publication_year":2023,"publication_date":"2023-11-30","ids":{"openalex":"https://openalex.org/W4389205601","doi":"https://doi.org/10.1186/s40537-023-00853-x"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-023-00853-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-023-00853-x","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-023-00853-x","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-023-00853-x","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019324453","display_name":"Rag\u0131p Onur \u00d6ZTORNACI","orcid":"https://orcid.org/0000-0002-9847-4030"},"institutions":[{"id":"https://openalex.org/I1351752","display_name":"Ko\u00e7 University","ror":"https://ror.org/00jzwgz36","country_code":"TR","type":"education","lineage":["https://openalex.org/I1351752"]},{"id":"https://openalex.org/I51780484","display_name":"Mersin \u00dcniversitesi","ror":"https://ror.org/04nqdwb39","country_code":"TR","type":"education","lineage":["https://openalex.org/I51780484"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"R. Onur \u00d6ztornaci","raw_affiliation_strings":["Faculty of Medicine, Department of Biostatistics and Medical Informatics, Mersin University, Mersin, Turkey","Ko\u00e7 University Research Centre for Translational Medicine, Ko\u00e7 University, Istanbul, Turkey"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Medicine, Department of Biostatistics and Medical Informatics, Mersin University, Mersin, Turkey","institution_ids":["https://openalex.org/I51780484"]},{"raw_affiliation_string":"Ko\u00e7 University Research Centre for Translational Medicine, Ko\u00e7 University, Istanbul, Turkey","institution_ids":["https://openalex.org/I1351752"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011066860","display_name":"Hamzah Syed","orcid":"https://orcid.org/0000-0001-6981-6962"},"institutions":[{"id":"https://openalex.org/I1351752","display_name":"Ko\u00e7 University","ror":"https://ror.org/00jzwgz36","country_code":"TR","type":"education","lineage":["https://openalex.org/I1351752"]},{"id":"https://openalex.org/I2800129641","display_name":"Great Ormond Street Hospital","ror":"https://ror.org/00zn2c847","country_code":"GB","type":"healthcare","lineage":["https://openalex.org/I2800129641","https://openalex.org/I2800349819"]},{"id":"https://openalex.org/I2800349819","display_name":"Great Ormond Street Hospital for Children NHS Foundation Trust","ror":"https://ror.org/03zydm450","country_code":"GB","type":"healthcare","lineage":["https://openalex.org/I2800349819"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB","TR"],"is_corresponding":false,"raw_author_name":"Hamzah Syed","raw_affiliation_strings":["GOSGene, Genetics and Genomic Medicine, UCL GOS Great Ormond Street Institute of Child Health, University College London, London, UK","Great Ormond Street Hospital NHS Foundation Trust and UCL Great Ormond Street Institute of Child Health, NIHR Great Ormond Street Hospital Biomedical Research Centre, London, UK","Ko\u00e7 University Research Centre for Translational Medicine, Ko\u00e7 University, Istanbul, Turkey"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"GOSGene, Genetics and Genomic Medicine, UCL GOS Great Ormond Street Institute of Child Health, University College London, London, UK","institution_ids":["https://openalex.org/I2800129641","https://openalex.org/I45129253"]},{"raw_affiliation_string":"Great Ormond Street Hospital NHS Foundation Trust and UCL Great Ormond Street Institute of Child Health, NIHR Great Ormond Street Hospital Biomedical Research Centre, London, UK","institution_ids":["https://openalex.org/I2800129641","https://openalex.org/I2800349819","https://openalex.org/I45129253"]},{"raw_affiliation_string":"Ko\u00e7 University Research Centre for Translational Medicine, Ko\u00e7 University, Istanbul, Turkey","institution_ids":["https://openalex.org/I1351752"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101573852","display_name":"Andrew P. Morris","orcid":"https://orcid.org/0000-0002-6805-6014"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]},{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Andrew P. Morris","raw_affiliation_strings":["Department of Health Data Science, University of Liverpool, Liverpool, UK","Division of Musculoskeletal and Dermatological Sciences, University of Manchester, Manchester, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Health Data Science, University of Liverpool, Liverpool, UK","institution_ids":["https://openalex.org/I146655781"]},{"raw_affiliation_string":"Division of Musculoskeletal and Dermatological Sciences, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062900472","display_name":"Bahar Ta\u015fdelen","orcid":"https://orcid.org/0000-0001-8146-4912"},"institutions":[{"id":"https://openalex.org/I51780484","display_name":"Mersin \u00dcniversitesi","ror":"https://ror.org/04nqdwb39","country_code":"TR","type":"education","lineage":["https://openalex.org/I51780484"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Bahar Ta\u015fdelen","raw_affiliation_strings":["Faculty of Medicine, Department of Biostatistics and Medical Informatics, Mersin University, Mersin, Turkey"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Medicine, Department of Biostatistics and Medical Informatics, Mersin University, Mersin, Turkey","institution_ids":["https://openalex.org/I51780484"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":2.0089,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.88209059,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"10","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9731000065803528,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.8163497447967529},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7395883202552795},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5660248398780823},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.5568594932556152},{"id":"https://openalex.org/keywords/multilayer-perceptron","display_name":"Multilayer perceptron","score":0.5458533763885498},{"id":"https://openalex.org/keywords/genome-wide-association-study","display_name":"Genome-wide association study","score":0.5391426682472229},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5261549353599548},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4478614032268524},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3324577212333679},{"id":"https://openalex.org/keywords/single-nucleotide-polymorphism","display_name":"Single-nucleotide polymorphism","score":0.29246270656585693},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.26729631423950195},{"id":"https://openalex.org/keywords/genotype","display_name":"Genotype","score":0.06664294004440308},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.06492292881011963}],"concepts":[{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.8163497447967529},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7395883202552795},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5660248398780823},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.5568594932556152},{"id":"https://openalex.org/C179717631","wikidata":"https://www.wikidata.org/wiki/Q2991667","display_name":"Multilayer perceptron","level":3,"score":0.5458533763885498},{"id":"https://openalex.org/C106208931","wikidata":"https://www.wikidata.org/wiki/Q1098876","display_name":"Genome-wide association study","level":5,"score":0.5391426682472229},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5261549353599548},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4478614032268524},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3324577212333679},{"id":"https://openalex.org/C153209595","wikidata":"https://www.wikidata.org/wiki/Q501128","display_name":"Single-nucleotide polymorphism","level":4,"score":0.29246270656585693},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.26729631423950195},{"id":"https://openalex.org/C135763542","wikidata":"https://www.wikidata.org/wiki/Q106016","display_name":"Genotype","level":3,"score":0.06664294004440308},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.06492292881011963},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1186/s40537-023-00853-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-023-00853-x","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-023-00853-x","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:4b6d27f55b894c45b3ebdea0af8933f6","is_oa":true,"landing_page_url":"https://doaj.org/article/4b6d27f55b894c45b3ebdea0af8933f6","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Big Data, Vol 10, Iss 1, Pp 1-28 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s40537-023-00853-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-023-00853-x","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-023-00853-x","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/15","display_name":"Life in Land"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4389205601.pdf"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W1941659294","https://openalex.org/W1970250140","https://openalex.org/W2012908927","https://openalex.org/W2046912033","https://openalex.org/W2058338438","https://openalex.org/W2070659891","https://openalex.org/W2078885513","https://openalex.org/W2091374137","https://openalex.org/W2093213907","https://openalex.org/W2095727900","https://openalex.org/W2108728387","https://openalex.org/W2115358726","https://openalex.org/W2118685291","https://openalex.org/W2136290838","https://openalex.org/W2137880226","https://openalex.org/W2143426320","https://openalex.org/W2147606185","https://openalex.org/W2148143831","https://openalex.org/W2153491803","https://openalex.org/W2155632266","https://openalex.org/W2161633633","https://openalex.org/W2166207903","https://openalex.org/W2226787034","https://openalex.org/W2270323152","https://openalex.org/W2519584884","https://openalex.org/W2558493806","https://openalex.org/W2582043155","https://openalex.org/W2588050043","https://openalex.org/W2610257065","https://openalex.org/W2620563544","https://openalex.org/W2889664156","https://openalex.org/W2899365137","https://openalex.org/W2911964244","https://openalex.org/W2950099124","https://openalex.org/W2950722229","https://openalex.org/W2964136462","https://openalex.org/W2991034145","https://openalex.org/W2995918088","https://openalex.org/W2999309192","https://openalex.org/W3007615437","https://openalex.org/W3022437778","https://openalex.org/W3037682562","https://openalex.org/W3103324688","https://openalex.org/W3136951845","https://openalex.org/W3154586314","https://openalex.org/W3160046514","https://openalex.org/W3160570518","https://openalex.org/W3165582150","https://openalex.org/W3208674021","https://openalex.org/W3213788487","https://openalex.org/W4211253314","https://openalex.org/W4247993926","https://openalex.org/W4296966925","https://openalex.org/W4391233682"],"related_works":["https://openalex.org/W4386259002","https://openalex.org/W1546989560","https://openalex.org/W3193043704","https://openalex.org/W3171520305","https://openalex.org/W4200112873","https://openalex.org/W2955796858","https://openalex.org/W4224941037","https://openalex.org/W2004826645","https://openalex.org/W3135818052","https://openalex.org/W4280611221"],"abstract_inverted_index":{"Abstract":[0],"Machine":[1],"learning":[2],"(ML)":[3],"methods":[4,185],"for":[5,45],"uncovering":[6],"single":[7],"nucleotide":[8],"polymorphisms":[9],"(SNPs)":[10],"in":[11,29],"genome-wide":[12],"association":[13],"study":[14],"(GWAS)":[15],"data":[16,49,51,89],"that":[17,62],"can":[18],"be":[19],"used":[20,28,186],"to":[21,59,112,196],"predict":[22,63],"disease":[23],"outcomes":[24],"are":[25,40],"becoming":[26],"increasingly":[27],"genetic":[30],"research.":[31],"Two":[32],"issues":[33],"with":[34,47],"the":[35,42,70,100,116,151],"use":[36],"of":[37,104,150,167,181],"ML":[38,57,132,192],"models":[39,58],"finding":[41],"correct":[43],"method":[44],"dealing":[46],"imbalanced":[48],"and":[50,96,121,143,169,175],"training.":[52],"This":[53],"article":[54],"compares":[55],"three":[56,130,158,191],"identify":[60],"SNPs":[61,113],"type":[64],"2":[65],"diabetes":[66],"(T2D)":[67],"status":[68],"using":[69,129],"Support":[71],"vector":[72,136],"machine":[73,137],"SMOTE":[74,170],"(SVM":[75],"SMOTE),":[76],"The":[77,125,148,160],"Adaptive":[78],"Synthetic":[79],"Sampling":[80],"Approach":[81],"(ADASYN),":[82],"Random":[83],"under":[84],"sampling":[85],"(RUS)":[86],"on":[87],"GWAS":[88],"from":[90,99],"elderly":[91],"male":[92],"participants":[93],"(165":[94],"cases":[95],"951":[97],"controls)":[98],"Uppsala":[101],"Longitudinal":[102],"Study":[103],"Adult":[105],"Men":[106],"(ULSAM).":[107],"It":[108],"was":[109,127,154,164],"also":[110],"applied":[111],"selected":[114],"by":[115],"SMOTE,":[117,119],"SVM":[118,176],"ADASYN,":[120],"RUS":[122],"clumping":[123],"method.":[124],"analysis":[126],"performed":[128],"different":[131],"models:":[133],"(i)":[134],"support":[135],"(SVM),":[138],"(ii)":[139],"multilayer":[140],"perceptron":[141],"(MLP)":[142],"(iii)":[144],"random":[145],"forests":[146],"(RF).":[147],"accuracy":[149,179],"case\u2013control":[152],"classification":[153,162],"compared":[155],"between":[156],"these":[157],"methods.":[159],"best":[161],"algorithm":[163],"a":[165],"combination":[166],"MLP":[168],"(97%":[171],"accuracy).":[172],"Both":[173],"RF":[174],"achieved":[177],"good":[178],"results":[180],"over":[182],"90%.":[183],"Overall,":[184],"against":[187],"unbalanced":[188],"data,":[189],"all":[190],"algorithms":[193],"were":[194],"found":[195],"improve":[197],"prediction":[198],"accuracy.":[199]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
