{"id":"https://openalex.org/W4312256415","doi":"https://doi.org/10.1109/case49997.2022.9926548","title":"GUM: A Guided Undersampling Method to Preprocess Imbalanced Datasets for Classification","display_name":"GUM: A Guided Undersampling Method to Preprocess Imbalanced Datasets for Classification","publication_year":2022,"publication_date":"2022-08-20","ids":{"openalex":"https://openalex.org/W4312256415","doi":"https://doi.org/10.1109/case49997.2022.9926548"},"language":"en","primary_location":{"id":"doi:10.1109/case49997.2022.9926548","is_oa":false,"landing_page_url":"https://doi.org/10.1109/case49997.2022.9926548","pdf_url":null,"source":{"id":"https://openalex.org/S4363607892","display_name":"2022 IEEE 18th International Conference on Automation Science and Engineering (CASE)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 18th International Conference on Automation Science and Engineering (CASE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066723368","display_name":"Kisuk Sung","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Kisuk Sung","raw_affiliation_strings":["Samsung Life Insurance,Seoul,Korea,06620"],"affiliations":[{"raw_affiliation_string":"Samsung Life Insurance,Seoul,Korea,06620","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026332271","display_name":"W. Eric Brown","orcid":null},"institutions":[{"id":"https://openalex.org/I12315562","display_name":"Texas Tech University","ror":"https://ror.org/0405mnx93","country_code":"US","type":"education","lineage":["https://openalex.org/I12315562"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"W. Eric Brown","raw_affiliation_strings":["Texas Tech University,Rawls College of Business","Rawls College of Business, Texas Tech University"],"affiliations":[{"raw_affiliation_string":"Texas Tech University,Rawls College of Business","institution_ids":["https://openalex.org/I12315562"]},{"raw_affiliation_string":"Rawls College of Business, Texas Tech University","institution_ids":["https://openalex.org/I12315562"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065668041","display_name":"Erick Moreno\u2010Centeno","orcid":"https://orcid.org/0000-0001-6258-5428"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Erick Moreno-Centeno","raw_affiliation_strings":["Texas A&#x0026;M University,Wm Michael Barnes&#x2019;64 Department of Industrial and Systems Engineering,College Station,Texas,USA"],"affiliations":[{"raw_affiliation_string":"Texas A&#x0026;M University,Wm Michael Barnes&#x2019;64 Department of Industrial and Systems Engineering,College Station,Texas,USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066523990","display_name":"Yu Ding","orcid":"https://orcid.org/0000-0001-6936-074X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu Ding","raw_affiliation_strings":["Texas A&#x0026;M University,Wm Michael Barnes&#x2019;64 Department of Industrial and Systems Engineering,College Station,Texas,USA"],"affiliations":[{"raw_affiliation_string":"Texas A&#x0026;M University,Wm Michael Barnes&#x2019;64 Department of Industrial and Systems Engineering,College Station,Texas,USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5066723368"],"corresponding_institution_ids":["https://openalex.org/I2250650973"],"apc_list":null,"apc_paid":null,"fwci":0.1039,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.31464124,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1086","last_page":"1091"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11220","display_name":"Water Systems and Optimization","score":0.9164999723434448,"subfield":{"id":"https://openalex.org/subfields/2205","display_name":"Civil and Structural Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/undersampling","display_name":"Undersampling","score":0.9447717666625977},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.7210118770599365},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7098135352134705},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.6896694302558899},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6444908380508423},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5763065814971924},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5712511539459229},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5152966976165771},{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.4945048987865448},{"id":"https://openalex.org/keywords/one-class-classification","display_name":"One-class classification","score":0.4881740212440491},{"id":"https://openalex.org/keywords/statistical-classification","display_name":"Statistical classification","score":0.4629465341567993},{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.4536043405532837},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.42529797554016113},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4191519320011139},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.416043221950531},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.39958739280700684}],"concepts":[{"id":"https://openalex.org/C136536468","wikidata":"https://www.wikidata.org/wiki/Q1225894","display_name":"Undersampling","level":2,"score":0.9447717666625977},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.7210118770599365},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7098135352134705},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.6896694302558899},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6444908380508423},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5763065814971924},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5712511539459229},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5152966976165771},{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.4945048987865448},{"id":"https://openalex.org/C34872919","wikidata":"https://www.wikidata.org/wiki/Q7092302","display_name":"One-class classification","level":3,"score":0.4881740212440491},{"id":"https://openalex.org/C110083411","wikidata":"https://www.wikidata.org/wiki/Q1744628","display_name":"Statistical classification","level":2,"score":0.4629465341567993},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.4536043405532837},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.42529797554016113},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4191519320011139},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.416043221950531},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.39958739280700684},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/case49997.2022.9926548","is_oa":false,"landing_page_url":"https://doi.org/10.1109/case49997.2022.9926548","pdf_url":null,"source":{"id":"https://openalex.org/S4363607892","display_name":"2022 IEEE 18th International Conference on Automation Science and Engineering (CASE)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 18th International Conference on Automation Science and Engineering (CASE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.6399999856948853}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4308469503","https://openalex.org/W32988189","https://openalex.org/W2904737874","https://openalex.org/W80466363","https://openalex.org/W4389233021","https://openalex.org/W2399571531","https://openalex.org/W2947132063","https://openalex.org/W4288337828","https://openalex.org/W4390415670","https://openalex.org/W4287816717"],"abstract_inverted_index":{"In":[0],"imbalanced":[1],"datasets,":[2],"where":[3],"the":[4,12,81,90,112,123],"majority":[5,31,71,82,91],"class":[6,83],"has":[7],"significantly":[8],"more":[9],"instances":[10,29,68],"than":[11],"minority":[13,73],"class,":[14],"conventional":[15],"classification":[16,109,118,131],"methods":[17],"exhibit":[18],"poor":[19],"minority-class":[20],"detection":[21],"performance":[22],"because":[23],"they":[24],"tend":[25],"to":[26,50,85,115],"classify":[27],"most":[28],"as":[30],"instances.":[32,59],"To":[33],"address":[34],"this":[35,37],"problem,":[36],"paper":[38],"presents":[39],"a":[40,52,117],"general-purpose":[41],"imbalanced-data":[42,130],"preprocessing":[43,96],"method":[44,97,125],"that":[45],"combines":[46],"two":[47,100],"instance-selecting":[48],"techniques":[49,101],"obtain":[51],"clean":[53],"and":[54,72,102],"balanced":[55],"set":[56],"of":[57],"training":[58],"The":[60,75],"first":[61],"technique,":[62,77],"ensemble":[63],"outlier":[64,67],"filtering,":[65],"removes":[66],"from":[69],"both":[70],"classes.":[74],"second":[76],"normalized-cut":[78],"sampling,":[79],"samples":[80],"aiming":[84],"preserve":[86],"its":[87],"distribution":[88],"across":[89],"region.":[92],"Our":[93],"proposed":[94,124],"data":[95,114],"uses":[98],"these":[99],"can":[103],"be":[104],"combined":[105],"with":[106],"any":[107],"general":[108],"methodology":[110],"on":[111],"sub-sampled":[113],"construct":[116],"model.":[119],"Computational":[120],"results":[121],"show":[122],"outperforms":[126],"several":[127],"widely":[128],"used":[129],"methods.":[132]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
