{"id":"https://openalex.org/W4417154630","doi":"https://doi.org/10.5334/dsj-2025-037","title":"Benchmarking Tabular Data Synthesis: Evaluating Tools, Metrics, and Datasets on Prosumer Hardware for End-Users","display_name":"Benchmarking Tabular Data Synthesis: Evaluating Tools, Metrics, and Datasets on Prosumer Hardware for End-Users","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4417154630","doi":"https://doi.org/10.5334/dsj-2025-037"},"language":"en","primary_location":{"id":"doi:10.5334/dsj-2025-037","is_oa":true,"landing_page_url":"https://doi.org/10.5334/dsj-2025-037","pdf_url":null,"source":{"id":"https://openalex.org/S62969111","display_name":"Data Science Journal","issn_l":"1683-1470","issn":["1683-1470"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320511","host_organization_name":"Ubiquity Press","host_organization_lineage":["https://openalex.org/P4310320511"],"host_organization_lineage_names":["Ubiquity Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Science Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.5334/dsj-2025-037","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120731007","display_name":"Maria Fernanda Davila Restrepo","orcid":"https://orcid.org/0009-0009-8002-7134"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Maria Fernanda Davila Restrepo","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0009-0009-8002-7134","affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021695310","display_name":"Benjamin Wollmer","orcid":"https://orcid.org/0000-0002-0545-8040"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benjamin Wollmer","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008225284","display_name":"Fabian Panse","orcid":"https://orcid.org/0000-0002-0675-4116"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fabian Panse","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0002-0675-4116","affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5074444329","display_name":"Wolfram Wingerath","orcid":"https://orcid.org/0000-0003-3512-5789"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wolfram Wingerath","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0003-3512-5789","affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5120731007"],"corresponding_institution_ids":[],"apc_list":{"value":350,"currency":"GBP","value_usd":429},"apc_paid":{"value":350,"currency":"GBP","value_usd":429},"fwci":3.4857,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.95236019,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"24","issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.4153999984264374,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.4153999984264374,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.06310000270605087,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.04859999939799309,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.8087999820709229},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5271999835968018},{"id":"https://openalex.org/keywords/data-aggregator","display_name":"Data aggregator","score":0.396699994802475},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.3736000061035156},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.3650999963283539},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.364300012588501},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.3553999960422516},{"id":"https://openalex.org/keywords/prosumer","display_name":"Prosumer","score":0.3287999927997589},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.3208000063896179}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8356000185012817},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.8087999820709229},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5289999842643738},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5271999835968018},{"id":"https://openalex.org/C82578977","wikidata":"https://www.wikidata.org/wiki/Q16773055","display_name":"Data aggregator","level":3,"score":0.396699994802475},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3939000070095062},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3736000061035156},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.3650999963283539},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.364300012588501},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.3553999960422516},{"id":"https://openalex.org/C2779939747","wikidata":"https://www.wikidata.org/wiki/Q1145220","display_name":"Prosumer","level":3,"score":0.3287999927997589},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3208000063896179},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.30979999899864197},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.301800012588501},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C33724603","wikidata":"https://www.wikidata.org/wiki/Q812540","display_name":"Bayesian network","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2655999958515167},{"id":"https://openalex.org/C2985331491","wikidata":"https://www.wikidata.org/wiki/Q5227298","display_name":"Data format","level":2,"score":0.2615000009536743},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C2777480716","wikidata":"https://www.wikidata.org/wiki/Q23582796","display_name":"Resource consumption","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C35525427","wikidata":"https://www.wikidata.org/wiki/Q745881","display_name":"Intrusion detection system","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C147494362","wikidata":"https://www.wikidata.org/wiki/Q2078905","display_name":"Troubleshooting","level":2,"score":0.2554999887943268}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.5334/dsj-2025-037","is_oa":true,"landing_page_url":"https://doi.org/10.5334/dsj-2025-037","pdf_url":null,"source":{"id":"https://openalex.org/S62969111","display_name":"Data Science Journal","issn_l":"1683-1470","issn":["1683-1470"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320511","host_organization_name":"Ubiquity Press","host_organization_lineage":["https://openalex.org/P4310320511"],"host_organization_lineage_names":["Ubiquity Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Science Journal","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:d9531e2b51274f84960adf8048c71fdc","is_oa":true,"landing_page_url":"https://doaj.org/article/d9531e2b51274f84960adf8048c71fdc","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data Science Journal, Vol 24, Pp 37-37 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.5334/dsj-2025-037","is_oa":true,"landing_page_url":"https://doi.org/10.5334/dsj-2025-037","pdf_url":null,"source":{"id":"https://openalex.org/S62969111","display_name":"Data Science Journal","issn_l":"1683-1470","issn":["1683-1470"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320511","host_organization_name":"Ubiquity Press","host_organization_lineage":["https://openalex.org/P4310320511"],"host_organization_lineage_names":["Ubiquity Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Science Journal","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2128363840","https://openalex.org/W2806276686","https://openalex.org/W3176539131","https://openalex.org/W4224022798","https://openalex.org/W4312846665","https://openalex.org/W4391054870","https://openalex.org/W4406379937"],"related_works":[],"abstract_inverted_index":{"Synthetic":[0],"data":[1,7,18,20,26,35,173],"is":[2,8],"a":[3,31],"useful":[4],"solution":[5],"when":[6],"scarce":[9],"or":[10],"private,":[11],"as":[12,162],"it":[13],"supports":[14],"reproducible":[15],"experimentation,":[16],"privacy-preserving":[17],"sharing,":[19],"re-purposing,":[21],"and":[22,59,76,96,112,117,120,149,166,172],"robust":[23],"evaluation":[24],"of":[25],"systems.":[27],"This":[28],"study":[29,156],"presents":[30],"benchmark":[32],"for":[33,94],"tabular":[34],"synthesis":[36],"(TDS)":[37],"tools,":[38],"evaluating":[39],"their":[40],"performance":[41],"across":[42,82,126],"six":[43],"critical":[44],"dimensions:":[45],"handling":[46,51,110],"dataset":[47,49],"imbalance,":[48],"augmentation,":[50],"missing":[52],"values,":[53],"privacy,":[54],"machine":[55],"learning":[56],"(ML)":[57],"utility,":[58],"computational":[60,132],"performance.":[61],"Our":[62],"findings":[63],"provide":[64],"practical":[65],"insights":[66],"to":[67,142],"guide":[68],"tool":[69],"selection":[70],"based":[71],"on":[72,90],"specific":[73],"use":[74,87],"cases":[75],"constraints.":[77],"We":[78],"assessed":[79],"13":[80],"tools":[81,105],"15":[83],"datasets":[84],"from":[85],"different":[86],"cases,":[88],"focusing":[89],"prosumer":[91],"hardware":[92],"configurations":[93],"end-users":[95],"highlight":[97],"the":[98,179],"trade-offs":[99],"among":[100],"various":[101],"TDS":[102],"models.":[103],"Sampling-based":[104],"like":[106],"SMOTE":[107],"excelled":[108],"in":[109,153],"imbalance":[111],"efficiency":[113,148],"but":[114,129,139],"lacked":[115],"privacy":[116,150],"variability.":[118],"Hybrid":[119],"Transformer":[121],"models":[122,135],"demonstrated":[123],"strong":[124],"results":[125],"most":[127],"dimensions":[128],"required":[130],"substantial":[131],"resources.":[133],"Diffusion":[134],"achieved":[136],"high":[137],"scores":[138],"were":[140],"complex":[141],"configure,":[143],"while":[144],"Bayesian":[145],"Networks":[146],"offered":[147],"with":[151],"limitations":[152],"utility.":[154],"The":[155,169],"also":[157],"emphasizes":[158],"non-functional":[159],"considerations":[160],"such":[161],"runtime,":[163],"resource":[164],"efficiency,":[165],"configuration":[167],"challenges.":[168],"source":[170],"code":[171],"have":[174],"been":[175],"made":[176],"available":[177],"at":[178],"Github":[180],"Repository.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-09T13:55:54.758798","created_date":"2025-12-09T00:00:00"}
