{"id":"https://openalex.org/W4414110570","doi":"https://doi.org/10.1109/tkde.2025.3608246","title":"GOIO: Generative Oversampling Approach to Class Imbalance and Overlap of Tabular Data","display_name":"GOIO: Generative Oversampling Approach to Class Imbalance and Overlap of Tabular Data","publication_year":2025,"publication_date":"2025-09-10","ids":{"openalex":"https://openalex.org/W4414110570","doi":"https://doi.org/10.1109/tkde.2025.3608246"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2025.3608246","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2025.3608246","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107297585","display_name":"Shiqi Ren","orcid":"https://orcid.org/0009-0004-5800-0166"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shiqi Ren","raw_affiliation_strings":["State Key Laboratory of Synthetical Automation for Process Industries, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Synthetical Automation for Process Industries, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022740106","display_name":"Jinliang Ding","orcid":"https://orcid.org/0000-0003-3735-0672"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinliang Ding","raw_affiliation_strings":["State Key Laboratory of Synthetical Automation for Process Industries, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Synthetical Automation for Process Industries, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057021534","display_name":"Cuie Yang","orcid":"https://orcid.org/0000-0003-1997-1854"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cuie Yang","raw_affiliation_strings":["State Key Laboratory of Synthetical Automation for Process Industries, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Synthetical Automation for Process Industries, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038516431","display_name":"Yiu\u2010ming Cheung","orcid":"https://orcid.org/0000-0001-7629-4648"},"institutions":[{"id":"https://openalex.org/I141568987","display_name":"Hong Kong Baptist University","ror":"https://ror.org/0145fw131","country_code":"HK","type":"education","lineage":["https://openalex.org/I141568987"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yiu-ming Cheung","raw_affiliation_strings":["Department of Computer Science, Hong Kong Baptist University, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Hong Kong Baptist University, Hong Kong, China","institution_ids":["https://openalex.org/I141568987"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5107297585"],"corresponding_institution_ids":["https://openalex.org/I9224756"],"apc_list":null,"apc_paid":null,"fwci":5.4688,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.9620423,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"37","issue":"11","first_page":"6450","last_page":"6463"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14400","display_name":"Medical Coding and Health Information","score":0.9115999937057495,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T14400","display_name":"Medical Coding and Health Information","score":0.9115999937057495,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.919700026512146},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5715000033378601},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.5698000192642212},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5285000205039978},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4848000109195709},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4810999929904938},{"id":"https://openalex.org/keywords/decision-boundary","display_name":"Decision boundary","score":0.46129998564720154},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.44130000472068787}],"concepts":[{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.919700026512146},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6704999804496765},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6308000087738037},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5715000033378601},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.5698000192642212},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5285000205039978},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4848000109195709},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4810999929904938},{"id":"https://openalex.org/C42023084","wikidata":"https://www.wikidata.org/wiki/Q5249231","display_name":"Decision boundary","level":3,"score":0.46129998564720154},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.44130000472068787},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3978999853134155},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C70710897","wikidata":"https://www.wikidata.org/wiki/Q680081","display_name":"Separable space","level":2,"score":0.36820000410079956},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C68693459","wikidata":"https://www.wikidata.org/wiki/Q657586","display_name":"Hyperplane","level":2,"score":0.32910001277923584},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.3203999996185303},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3003999888896942},{"id":"https://openalex.org/C70727504","wikidata":"https://www.wikidata.org/wiki/Q1806878","display_name":"Latent class model","level":2,"score":0.3003000020980835},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.296999990940094},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C18653775","wikidata":"https://www.wikidata.org/wiki/Q1333358","display_name":"Joint probability distribution","level":2,"score":0.25540000200271606},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.25360000133514404}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2025.3608246","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2025.3608246","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W102369970","https://openalex.org/W1993220166","https://openalex.org/W2016023958","https://openalex.org/W2087115937","https://openalex.org/W2090135786","https://openalex.org/W2118978333","https://openalex.org/W2122111042","https://openalex.org/W2123458540","https://openalex.org/W2124940540","https://openalex.org/W2132791018","https://openalex.org/W2133506114","https://openalex.org/W2148143831","https://openalex.org/W2343742239","https://openalex.org/W2462401346","https://openalex.org/W2562319768","https://openalex.org/W2620760558","https://openalex.org/W2756359217","https://openalex.org/W2767325013","https://openalex.org/W2963744743","https://openalex.org/W2981515171","https://openalex.org/W2985783697","https://openalex.org/W2997546679","https://openalex.org/W3008812129","https://openalex.org/W3043153112","https://openalex.org/W3082059448","https://openalex.org/W3087890894","https://openalex.org/W3109072952","https://openalex.org/W3120644841","https://openalex.org/W3157699413","https://openalex.org/W3216660278","https://openalex.org/W4220717373","https://openalex.org/W4285212650","https://openalex.org/W4290876805","https://openalex.org/W4292974997","https://openalex.org/W4297241299","https://openalex.org/W4311176176","https://openalex.org/W4400276007","https://openalex.org/W4400762160","https://openalex.org/W4402738944","https://openalex.org/W4402916186"],"related_works":["https://openalex.org/W2766503024","https://openalex.org/W2781247653","https://openalex.org/W4206637278","https://openalex.org/W4386005305","https://openalex.org/W4386214543","https://openalex.org/W3082051559","https://openalex.org/W1969988626","https://openalex.org/W1682621979","https://openalex.org/W2141301039","https://openalex.org/W2300921526"],"abstract_inverted_index":{"Class":[0,58],"imbalance,":[1],"which":[2],"is":[3,19,119],"common":[4],"in":[5,63,140,175,190],"real-world":[6,162],"classification":[7,65],"tasks,":[8],"often":[9,32],"leads":[10],"to":[11,24,56,83,96,129],"biased":[12],"models":[13],"favoring":[14],"majority":[15],"classes.":[16],"Data":[17],"oversampling":[18,30,173],"a":[20,51,71,77,93,109],"widely":[21],"used":[22],"strategy":[23,128],"address":[25],"this":[26,47],"issue.":[27],"However,":[28],"traditional":[29],"methods":[31,174],"generate":[33],"incorrect":[34],"or":[35],"redundant":[36],"instances":[37],"when":[38],"class":[39,85,101,136,192],"overlap":[40,88,102],"occurs,":[41],"increasing":[42],"decision":[43],"boundary":[44],"complexity.":[45],"To":[46],"end,":[48],"we":[49],"propose":[50],"novel":[52],"Generative":[53],"Oversampling":[54],"approach":[55],"addressing":[57,191],"Imbalance":[59],"and":[60,76,87,125,147,163,178,194],"Overlap":[61],"(GOIO)":[62],"the":[64,97,105,117,141,145,151,155,171,184,187],"of":[66,100,186],"tabular":[67],"data.":[68],"GOIO":[69,169],"combines":[70],"Metric-Learning-based":[72],"Variational":[73],"Autoencoder":[74],"(MLVAE)":[75],"Conditional":[78],"Latent":[79],"Diffusion":[80],"Model":[81],"(CLDM)":[82],"handle":[84],"imbalance":[86,193],"effectively.":[89],"The":[90],"MLVAE":[91,156],"employs":[92],"triplet-center":[94],"loss":[95],"adverse":[98],"effects":[99],"by":[103],"transforming":[104],"data":[106,152],"distribution":[107],"into":[108,150],"more":[110],"separable":[111],"latent":[112,132,142],"feature":[113,123],"space.":[114],"Following":[115],"this,":[116],"CLDM":[118,146],"trained":[120],"with":[121],"class-center":[122],"prompting":[124],"classifier-free":[126],"guidance":[127],"capture":[130],"class-specific":[131],"distributions":[133],"accurately.":[134],"Minority":[135],"samples":[137],"are":[138],"synthesized":[139],"space":[143,153],"using":[144],"then":[148],"reconstructed":[149],"via":[154],"decoder.":[157],"Comprehensive":[158],"experiments":[159],"on":[160],"18":[161],"five":[164],"synthetic":[165],"datasets":[166],"demonstrate":[167],"that":[168],"outperforms":[170],"state-of-the-art":[172],"F1-score,":[176],"MCC,":[177],"Accuracy.":[179],"Ablation":[180],"studies":[181],"further":[182],"validate":[183],"effectiveness":[185],"proposed":[188],"contributions":[189],"overlap.":[195]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-04-11T08:14:18.477133","created_date":"2025-10-10T00:00:00"}
