{"id":"https://openalex.org/W4318147842","doi":"https://doi.org/10.1109/bigdata55660.2022.10020487","title":"An AutoEncoder-based Numerical Training Data Augmentation Technique","display_name":"An AutoEncoder-based Numerical Training Data Augmentation Technique","publication_year":2022,"publication_date":"2022-12-17","ids":{"openalex":"https://openalex.org/W4318147842","doi":"https://doi.org/10.1109/bigdata55660.2022.10020487"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata55660.2022.10020487","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata55660.2022.10020487","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050502016","display_name":"Jueun Jeong","orcid":null},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jueun Jeong","raw_affiliation_strings":["University of Seoul,Dept. Electrical and Computer Engineering,Seoul,South Korea","Dept. Electrical and Computer Engineering, University of Seoul, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"University of Seoul,Dept. Electrical and Computer Engineering,Seoul,South Korea","institution_ids":["https://openalex.org/I124633538"]},{"raw_affiliation_string":"Dept. Electrical and Computer Engineering, University of Seoul, Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037170448","display_name":"Hanseok Jeong","orcid":"https://orcid.org/0000-0001-8357-3047"},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hanseok Jeong","raw_affiliation_strings":["University of Seoul,Dept. Electrical and Computer Engineering,Seoul,South Korea","Dept. Electrical and Computer Engineering, University of Seoul, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"University of Seoul,Dept. Electrical and Computer Engineering,Seoul,South Korea","institution_ids":["https://openalex.org/I124633538"]},{"raw_affiliation_string":"Dept. Electrical and Computer Engineering, University of Seoul, Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101538818","display_name":"Hanjoon Kim","orcid":"https://orcid.org/0000-0003-4510-5685"},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Han-Joon Kim","raw_affiliation_strings":["University of Seoul,Dept. Electrical and Computer Engineering,Seoul,South Korea","Dept. Electrical and Computer Engineering, University of Seoul, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"University of Seoul,Dept. Electrical and Computer Engineering,Seoul,South Korea","institution_ids":["https://openalex.org/I124633538"]},{"raw_affiliation_string":"Dept. Electrical and Computer Engineering, University of Seoul, Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5050502016"],"corresponding_institution_ids":["https://openalex.org/I124633538"],"apc_list":null,"apc_paid":null,"fwci":0.6236,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.6892911,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"5944","last_page":"5951"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9771000146865845,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.9136664867401123},{"id":"https://openalex.org/keywords/discretization","display_name":"Discretization","score":0.8465883731842041},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7144667506217957},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6719661951065063},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5891546010971069},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.5237662196159363},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5034932494163513},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4962316155433655},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4885769784450531},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.472729355096817},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4401146173477173},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.43720507621765137},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33408939838409424},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3020053505897522},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.168250173330307}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.9136664867401123},{"id":"https://openalex.org/C73000952","wikidata":"https://www.wikidata.org/wiki/Q17007827","display_name":"Discretization","level":2,"score":0.8465883731842041},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7144667506217957},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6719661951065063},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5891546010971069},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.5237662196159363},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5034932494163513},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4962316155433655},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4885769784450531},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.472729355096817},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4401146173477173},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.43720507621765137},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33408939838409424},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3020053505897522},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.168250173330307},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata55660.2022.10020487","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata55660.2022.10020487","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1208489","https://openalex.org/W1570448133","https://openalex.org/W1678889691","https://openalex.org/W1959608418","https://openalex.org/W1977556410","https://openalex.org/W2025768430","https://openalex.org/W2104167780","https://openalex.org/W2132791018","https://openalex.org/W2135511047","https://openalex.org/W2144675448","https://openalex.org/W2149230623","https://openalex.org/W2213612645","https://openalex.org/W2302382203","https://openalex.org/W4211120798","https://openalex.org/W4221002080","https://openalex.org/W4249247926","https://openalex.org/W4287815474","https://openalex.org/W4298289240","https://openalex.org/W6603553207","https://openalex.org/W6631919115","https://openalex.org/W6640963894"],"related_works":["https://openalex.org/W2989490741","https://openalex.org/W3092506759","https://openalex.org/W2367545121","https://openalex.org/W4248881655","https://openalex.org/W2482165163","https://openalex.org/W3010890513","https://openalex.org/W120741642","https://openalex.org/W138569904","https://openalex.org/W2390914021","https://openalex.org/W2389417819"],"abstract_inverted_index":{"This":[0],"paper":[1],"aims":[2],"to":[3,19,30,91,125],"automatically":[4],"augment":[5],"numerical":[6,27,64,170],"tabular":[7,81],"data":[8,28,56,171],"by":[9,38,144,169],"using":[10,145,174],"the":[11,21,32,35,40,69,74,87,98,102,126,136,141,159,162],"variational":[12,59],"autoencoder":[13,60],"model.":[14],"For":[15,101],"this,":[16],"we":[17,46,105,134,165],"try":[18],"solve":[20],"problem":[22],"of":[23,34,71,76,86,138,140,150,161],"class":[24],"imbalance":[25],"in":[26,97],"and":[29,73,94,118],"improve":[31],"performance":[33,160],"classification":[36],"model":[37,143],"augmenting":[39],"training":[41],"data.":[42,82],"In":[43,129],"this":[44],"paper,":[45],"propose":[47],"a":[48,79,113,146],"new":[49],"augmentation":[50,57,172],"technique":[51,89],"called":[52,155],"\u2018D-VAE\u2019":[53],"which":[54,110],"performs":[55],"through":[58,109,152],"with":[61],"discretization":[62,93,103],"for":[63,78,131],"columuns;":[65],"D-VAE":[66],"artificially":[67],"increases":[68],"number":[70,75,137,149],"records":[72,111],"columns":[77],"given":[80,114],"The":[83],"main":[84],"features":[85,151],"proposed":[88,163],"are":[90,116],"kperform":[92],"feature":[95,153],"selection":[96,154],"preprocessing":[99],"process.":[100],"process,":[104],"use":[106],"k-means":[107],"algorithm,":[108],"within":[112],"table":[115],"grouped,":[117],"then":[119],"converted":[120],"into":[121],"one-hot":[122],"vectors":[123],"according":[124],"clustering":[127],"results.":[128],"addition,":[130],"memory":[132],"efficiency,":[133],"reduced":[135],"parameters":[139],"VAE":[142],"relatively":[147],"small":[148],"REFCV.":[156],"To":[157],"evaluate":[158],"technique,":[164],"conducted":[166],"various":[167],"experiments":[168],"ratio":[173],"four":[175],"open":[176],"datasets.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
