{"id":"https://openalex.org/W4415464836","doi":"https://doi.org/10.1186/s40537-025-01231-5","title":"A novel approach to automating unsupervised estimation of class distribution","display_name":"A novel approach to automating unsupervised estimation of class distribution","publication_year":2025,"publication_date":"2025-10-23","ids":{"openalex":"https://openalex.org/W4415464836","doi":"https://doi.org/10.1186/s40537-025-01231-5"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-025-01231-5","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-025-01231-5","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-025-01231-5","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-025-01231-5","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011476239","display_name":"Mary Anne Walauskis","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mary Anne Walauskis","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, Florida Atlantic University, Boca Raton, FL, 33431, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, Florida Atlantic University, Boca Raton, FL, 33431, USA","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089170562","display_name":"Taghi M. Khoshgoftaar","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taghi M. Khoshgoftaar","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, Florida Atlantic University, Boca Raton, FL, 33431, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, Florida Atlantic University, Boca Raton, FL, 33431, USA","institution_ids":["https://openalex.org/I63772739"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5011476239"],"corresponding_institution_ids":["https://openalex.org/I63772739"],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1635256,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"12","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11871","display_name":"Advanced Statistical Methods and Models","score":0.9805999994277954,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.8664000034332275},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.6807000041007996},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.5467000007629395},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5008999705314636},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4747999906539917},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.44929999113082886},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4478999972343445},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.43309998512268066}],"concepts":[{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.8664000034332275},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.821399986743927},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7415000200271606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6822999715805054},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.6807000041007996},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.5467000007629395},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5008999705314636},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4747999906539917},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.44929999113082886},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4478999972343445},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.43309998512268066},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4239000082015991},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.39340001344680786},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.351500004529953},{"id":"https://openalex.org/C60777511","wikidata":"https://www.wikidata.org/wiki/Q3045002","display_name":"Concept drift","level":3,"score":0.3237999975681305},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.2551000118255615}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1186/s40537-025-01231-5","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-025-01231-5","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-025-01231-5","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:af66595738da42af933404a6d6d1d9d1","is_oa":true,"landing_page_url":"https://doaj.org/article/af66595738da42af933404a6d6d1d9d1","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Big Data, Vol 12, Iss 1, Pp 1-29 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s40537-025-01231-5","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-025-01231-5","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-025-01231-5","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320310801","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387"},{"id":"https://openalex.org/F4320317380","display_name":"Universidad del Atl\u00e1ntico","ror":"https://ror.org/05mm1w714"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415464836.pdf","grobid_xml":"https://content.openalex.org/works/W4415464836.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W108232788","https://openalex.org/W1964055967","https://openalex.org/W1987669950","https://openalex.org/W2021166720","https://openalex.org/W2118978333","https://openalex.org/W2132870739","https://openalex.org/W2296719434","https://openalex.org/W2564754306","https://openalex.org/W2587441134","https://openalex.org/W2596052762","https://openalex.org/W2767106145","https://openalex.org/W2999309192","https://openalex.org/W3008527144","https://openalex.org/W3033548640","https://openalex.org/W3037381232","https://openalex.org/W4281746271","https://openalex.org/W4288055174","https://openalex.org/W4404492403","https://openalex.org/W4406895100","https://openalex.org/W4408339736","https://openalex.org/W4410398006"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"As":[1],"the":[2,18,36,60,70,81,110,120,144,148],"amount":[3],"of":[4,69,72,119,147,154],"unlabeled":[5,58,106],"data":[6],"has":[7],"continued":[8],"to":[9,14,33,48,164],"grow":[10],"and":[11,75,99,108,117,137,139,161,183,195],"present":[12],"challenges":[13],"machine":[15],"learning":[16],"practitioners,":[17],"need":[19],"for":[20],"unsupervised":[21,30,73,100],"solutions":[22],"is":[23,64],"more":[24],"evident":[25],"than":[26],"ever.":[27],"With":[28],"many":[29],"algorithms":[31,41,74],"available":[32],"classify":[34],"instances,":[35],"challenge":[37],"remains":[38,54],"that":[39,55],"these":[40],"require":[42],"fine-tuning":[43],"and/or":[44],"appropriate":[45,180],"parameter":[46,162],"selection":[47,71],"produce":[49],"reliable":[50],"results.":[51],"The":[52],"difficulty":[53],"given":[56,151],"an":[57],"dataset,":[59,107],"true":[61],"class":[62,111,121,135,149,171],"distribution":[63,112,122,150],"unknown,":[65],"which":[66],"impacts":[67],"appropriateness":[68],"hyperparameter":[76],"tuning,":[77],"as":[78,80,192],"well":[79],"evaluation":[82,181],"metrics":[83],"chosen.":[84],"Our":[85,156],"novel":[86],"approach":[87,157],"addresses":[88],"this":[89,152],"critical":[90],"gap":[91],"in":[92,133,169,175,189],"current":[93],"literature.":[94],"Through":[95],"a":[96,104],"fully":[97],"automated":[98],"framework,":[101],"we":[102],"take":[103],"binary":[105],"return":[109],"without":[113],"prior":[114],"domain":[115],"knowledge":[116],"regardless":[118],"-":[123],"imbalanced":[124,170],"or":[125],"balanced.":[126],"We":[127],"thoroughly":[128],"investigate":[129],"multiple":[130],"datasets":[131],"ranging":[132],"size,":[134],"distribution,":[136],"domain,":[138],"our":[140],"empirical":[141],"evidence":[142],"demonstrates":[143],"successful":[145],"determination":[146],"variety":[153],"factors.":[155],"uses":[158],"data-driven":[159],"threshold":[160],"settings":[163],"improve":[165],"model":[166],"performance,":[167],"particularly":[168],"scenarios.":[172],"This":[173],"helps":[174],"selecting":[176],"suitable":[177],"algorithms,":[178],"guiding":[179],"metrics,":[182],"promoting":[184],"fairer,":[185],"evidence-based":[186],"decision":[187],"making":[188],"fields":[190],"such":[191],"fraud":[193],"detection":[194],"healthcare.":[196]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-23T00:00:00"}
