{"id":"https://openalex.org/W4402241378","doi":"https://doi.org/10.1186/s13040-024-00384-y","title":"Processing imbalanced medical data at the data level with assisted-reproduction data as an example","display_name":"Processing imbalanced medical data at the data level with assisted-reproduction data as an example","publication_year":2024,"publication_date":"2024-09-04","ids":{"openalex":"https://openalex.org/W4402241378","doi":"https://doi.org/10.1186/s13040-024-00384-y","pmid":"https://pubmed.ncbi.nlm.nih.gov/39232851"},"language":"en","primary_location":{"id":"doi:10.1186/s13040-024-00384-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-024-00384-y","pdf_url":"https://link.springer.com/content/pdf/10.1186/s13040-024-00384-y.pdf","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1186/s13040-024-00384-y.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111315619","display_name":"Junliang Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I91656880","display_name":"China Medical University","ror":"https://ror.org/032d4f246","country_code":"CN","type":"education","lineage":["https://openalex.org/I91656880"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junliang Zhu","raw_affiliation_strings":["Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China","institution_ids":["https://openalex.org/I91656880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109776501","display_name":"Shaowei Pu","orcid":null},"institutions":[{"id":"https://openalex.org/I91656880","display_name":"China Medical University","ror":"https://ror.org/032d4f246","country_code":"CN","type":"education","lineage":["https://openalex.org/I91656880"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaowei Pu","raw_affiliation_strings":["Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China","institution_ids":["https://openalex.org/I91656880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103054680","display_name":"Jiaji He","orcid":"https://orcid.org/0000-0001-7226-3326"},"institutions":[{"id":"https://openalex.org/I91656880","display_name":"China Medical University","ror":"https://ror.org/032d4f246","country_code":"CN","type":"education","lineage":["https://openalex.org/I91656880"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaji He","raw_affiliation_strings":["Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China","institution_ids":["https://openalex.org/I91656880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107002971","display_name":"Dongchao Su","orcid":null},"institutions":[{"id":"https://openalex.org/I91656880","display_name":"China Medical University","ror":"https://ror.org/032d4f246","country_code":"CN","type":"education","lineage":["https://openalex.org/I91656880"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongchao Su","raw_affiliation_strings":["Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China","institution_ids":["https://openalex.org/I91656880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108088640","display_name":"Weijie Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I91656880","display_name":"China Medical University","ror":"https://ror.org/032d4f246","country_code":"CN","type":"education","lineage":["https://openalex.org/I91656880"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weijie Cai","raw_affiliation_strings":["Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China","institution_ids":["https://openalex.org/I91656880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106995137","display_name":"Xueying Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I91656880","display_name":"China Medical University","ror":"https://ror.org/032d4f246","country_code":"CN","type":"education","lineage":["https://openalex.org/I91656880"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueying Xu","raw_affiliation_strings":["Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China","institution_ids":["https://openalex.org/I91656880"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101447556","display_name":"Hongbo Liu","orcid":"https://orcid.org/0009-0000-7231-3653"},"institutions":[{"id":"https://openalex.org/I4210093430","display_name":"Shenyang Center for Disease Control and Prevention","ror":"https://ror.org/005mgvs97","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210093430"]},{"id":"https://openalex.org/I91656880","display_name":"China Medical University","ror":"https://ror.org/032d4f246","country_code":"CN","type":"education","lineage":["https://openalex.org/I91656880"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongbo Liu","raw_affiliation_strings":["Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China. hbliu@cmu.edu.cn","Key Lab of Environmental Stress and Chronic Disease Control & Prevention, China Medical University, No.77 Puhe Road, Shenyang North New Area, Shenyang, 110122, Liaoning Province, PR China. hbliu@cmu.edu.cn"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Health Statistics, School of Public Health, China Medical University, Shenyang, 110122, PR China. hbliu@cmu.edu.cn","institution_ids":["https://openalex.org/I91656880"]},{"raw_affiliation_string":"Key Lab of Environmental Stress and Chronic Disease Control & Prevention, China Medical University, No.77 Puhe Road, Shenyang North New Area, Shenyang, 110122, Liaoning Province, PR China. hbliu@cmu.edu.cn","institution_ids":["https://openalex.org/I91656880","https://openalex.org/I4210093430"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5111315619"],"corresponding_institution_ids":["https://openalex.org/I91656880"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":6.2633,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.96944584,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"17","issue":"1","first_page":"29","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.90829998254776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.90829998254776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.01119999960064888,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.010200000368058681,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6649857759475708},{"id":"https://openalex.org/keywords/reproduction","display_name":"Reproduction","score":0.5770092010498047},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4915343225002289},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3830519914627075},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.09662622213363647},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.0838279128074646}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6649857759475708},{"id":"https://openalex.org/C59659247","wikidata":"https://www.wikidata.org/wiki/Q11990","display_name":"Reproduction","level":2,"score":0.5770092010498047},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4915343225002289},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3830519914627075},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.09662622213363647},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0838279128074646}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s13040-024-00384-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-024-00384-y","pdf_url":"https://link.springer.com/content/pdf/10.1186/s13040-024-00384-y.pdf","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},{"id":"pmid:39232851","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39232851","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData mining","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:11373105","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/11373105","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11373105/pdf/13040_2024_Article_384.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BioData Min","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:b867b443611e49c8911792fc09d86987","is_oa":true,"landing_page_url":"https://doaj.org/article/b867b443611e49c8911792fc09d86987","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BioData Mining, Vol 17, Iss 1, Pp 1-17 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s13040-024-00384-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-024-00384-y","pdf_url":"https://link.springer.com/content/pdf/10.1186/s13040-024-00384-y.pdf","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402241378.pdf","grobid_xml":"https://content.openalex.org/works/W4402241378.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W1941659294","https://openalex.org/W1993220166","https://openalex.org/W2050420817","https://openalex.org/W2098254218","https://openalex.org/W2109509105","https://openalex.org/W2241877823","https://openalex.org/W2334028018","https://openalex.org/W2534295610","https://openalex.org/W2557684744","https://openalex.org/W2585770658","https://openalex.org/W2606288994","https://openalex.org/W2623340684","https://openalex.org/W2789894922","https://openalex.org/W2908465383","https://openalex.org/W3001625252","https://openalex.org/W3013063100","https://openalex.org/W3022206528","https://openalex.org/W3026405303","https://openalex.org/W3035102496","https://openalex.org/W3039142166","https://openalex.org/W3039681214","https://openalex.org/W3146861394","https://openalex.org/W3156338075","https://openalex.org/W3157699413","https://openalex.org/W3167186267","https://openalex.org/W3176376866","https://openalex.org/W3199751881","https://openalex.org/W3216794305","https://openalex.org/W4200472276","https://openalex.org/W4212831606","https://openalex.org/W4225553698","https://openalex.org/W4225632040","https://openalex.org/W4226372772","https://openalex.org/W4246447493","https://openalex.org/W4282832385","https://openalex.org/W4312201171","https://openalex.org/W4319996939","https://openalex.org/W4362571807","https://openalex.org/W4366992038","https://openalex.org/W4378195073","https://openalex.org/W4378417908","https://openalex.org/W4388626511","https://openalex.org/W4390693004","https://openalex.org/W4391876345","https://openalex.org/W4392946440","https://openalex.org/W4396584743"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"OBJECTIVE:":[0],"Data":[1],"imbalance":[2,36,48,112,143],"is":[3],"a":[4,91,240,246],"pervasive":[5],"issue":[6],"in":[7,71,90,226],"medical":[8,82],"data":[9,35],"mining,":[10],"often":[11],"leading":[12],"to":[13,22,30,67,99,119,153,163,274],"biased":[14],"and":[15,50,60,74,114,136,149,159,208,215,219,232,245,265,270,277],"unreliable":[16],"predictive":[17],"models.":[18,39,127],"This":[19],"study":[20,238],"aims":[21],"address":[23],"the":[24,32,44,62,101,105,121,175,202],"urgent":[25],"need":[26],"for":[27,104,140,205,254],"effective":[28],"strategies":[29],"mitigate":[31],"impact":[33],"of":[34,46,64,84,124,243,249],"on":[37,42,53],"classification":[38,122,224],"We":[40,80],"focus":[41],"quantifying":[43],"effects":[45],"different":[47,111],"degrees":[49,113],"sample":[51,76,115,161,187,209,234,247,267],"sizes":[52,116,162,188],"model":[54,69,257,278],"performance,":[55],"identifying":[56],"optimal":[57,203,252],"cut-off":[58],"values,":[59],"evaluating":[61],"efficacy":[63],"various":[65],"methods":[66,145],"enhance":[68],"accuracy":[70],"highly":[72],"imbalanced":[73],"small":[75,160,233,266],"size":[77,210,248],"scenarios.":[78],"METHODS:":[79],"collected":[81],"records":[83],"patients":[85],"receiving":[86],"assisted":[87],"reproductive":[88,92],"treatment":[89,144],"medicine":[93],"center.":[94],"Random":[95],"forest":[96],"was":[97,172,178],"used":[98,139],"screen":[100],"key":[102],"variables":[103],"prediction":[106],"target.":[107],"Various":[108],"datasets":[109,154,227,260],"with":[110,155,194,228,261],"were":[117,138,151,211],"constructed":[118],"compare":[120],"performance":[123,171,225],"logistic":[125,169,256],"regression":[126],"Metrics":[128],"such":[129],"as":[130,213,251],"AUC,":[131],"G-mean,":[132],"F1-Score,":[133],"Accuracy,":[134],"Recall,":[135],"Precision":[137],"evaluation.":[141],"Four":[142],"(SMOTE,":[146],"ADASYN,":[147],"OSS,":[148],"CNN)":[150],"applied":[152],"low":[156,173,229,262],"positive":[157,176,206,230,241,263],"rates":[158,231,264],"assess":[164],"their":[165],"effectiveness.":[166],"RESULTS:":[167],"The":[168,237],"model's":[170],"when":[174],"rate":[177,207,242],"below":[179,189],"10%":[180],"but":[181],"stabilized":[182],"beyond":[183],"this":[184,198],"threshold.":[185,199],"Similarly,":[186],"1200":[190],"yielded":[191],"poor":[192],"results,":[193],"improvement":[195],"seen":[196],"above":[197],"For":[200,259],"robustness,":[201],"cut-offs":[204,253],"identified":[212],"15%":[214,244],"1500,":[216],"respectively.":[217],"SMOTE":[218,269],"ADASYN":[220,271],"oversampling":[221],"significantly":[222],"improved":[223],"sizes.":[235],"CONCLUSIONS:":[236],"identifies":[239],"1500":[250],"stable":[255],"performance.":[258],"sizes,":[268],"are":[272],"recommended":[273],"improve":[275],"balance":[276],"accuracy.":[279]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":13}],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2025-10-10T00:00:00"}
