{"id":"https://openalex.org/W4396914046","doi":"https://doi.org/10.1109/access.2024.3434582","title":"Synthetic Tabular Data Validation: A Divergence-Based Approach","display_name":"Synthetic Tabular Data Validation: A Divergence-Based Approach","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4396914046","doi":"https://doi.org/10.1109/access.2024.3434582"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3434582","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3434582","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2024.3434582","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031960375","display_name":"Patricia A. Apell\u00e1niz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patricia A. Apell\u00e1niz","raw_affiliation_strings":["Information Processing and Telecommunications Center, ETS Ingenieros de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0002-8604-9758","affiliations":[{"raw_affiliation_string":"Information Processing and Telecommunications Center, ETS Ingenieros de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103426933","display_name":"Ana Jim\u00e9nez","orcid":"https://orcid.org/0009-0003-8679-4680"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ana Jim\u00e9nez","raw_affiliation_strings":["Information Processing and Telecommunications Center, ETS Ingenieros de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0009-0003-8679-4680","affiliations":[{"raw_affiliation_string":"Information Processing and Telecommunications Center, ETS Ingenieros de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068385983","display_name":"Borja Arroyo Galende","orcid":"https://orcid.org/0000-0001-5035-0998"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Borja Arroyo Galende","raw_affiliation_strings":["Information Processing and Telecommunications Center, ETS Ingenieros de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0001-5035-0998","affiliations":[{"raw_affiliation_string":"Information Processing and Telecommunications Center, ETS Ingenieros de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045383869","display_name":"Juan Parras","orcid":"https://orcid.org/0000-0002-7028-3179"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Juan Parras","raw_affiliation_strings":["Information Processing and Telecommunications Center, ETS Ingenieros de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0002-7028-3179","affiliations":[{"raw_affiliation_string":"Information Processing and Telecommunications Center, ETS Ingenieros de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008317106","display_name":"Santiago Zazo","orcid":"https://orcid.org/0000-0001-9073-7927"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Santiago Zazo","raw_affiliation_strings":["Information Processing and Telecommunications Center, ETS Ingenieros de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0001-9073-7927","affiliations":[{"raw_affiliation_string":"Information Processing and Telecommunications Center, ETS Ingenieros de Telecomunicaci&#x00F3;n, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":11.577,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.98963214,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"12","issue":null,"first_page":"103895","last_page":"103907"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11871","display_name":"Advanced Statistical Methods and Models","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11871","display_name":"Advanced Statistical Methods and Models","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12261","display_name":"Statistical Mechanics and Entropy","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11918","display_name":"Forecasting Techniques and Applications","score":0.9722999930381775,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6256958842277527},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.612609326839447},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.5940240025520325},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5231010913848877},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5083414912223816},{"id":"https://openalex.org/keywords/kullback\u2013leibler-divergence","display_name":"Kullback\u2013Leibler divergence","score":0.49360787868499756},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4681648910045624},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.4389435052871704},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3489668369293213},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3264215588569641},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2448176145553589},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1744716465473175}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6256958842277527},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.612609326839447},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.5940240025520325},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5231010913848877},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5083414912223816},{"id":"https://openalex.org/C171752962","wikidata":"https://www.wikidata.org/wiki/Q255166","display_name":"Kullback\u2013Leibler divergence","level":2,"score":0.49360787868499756},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4681648910045624},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.4389435052871704},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3489668369293213},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3264215588569641},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2448176145553589},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1744716465473175},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/access.2024.3434582","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3434582","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2405.07822","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.07822","pdf_url":"https://arxiv.org/pdf/2405.07822","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:doaj.org/article:68fef6c934fa44269e605296e59605d1","is_oa":true,"landing_page_url":"https://doaj.org/article/68fef6c934fa44269e605296e59605d1","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 103895-103907 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3434582","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3434582","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4000000059604645,"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17"}],"awards":[{"id":"https://openalex.org/G1544627512","display_name":"Genomics and Personalized Medicine for all though Artificial Intelligence in Haematological Diseases","funder_award_id":"101017549","funder_id":"https://openalex.org/F4320338342","funder_display_name":"H2020 Health"},{"id":"https://openalex.org/G294844132","display_name":null,"funder_award_id":"101017549","funder_id":"https://openalex.org/F4320337664","funder_display_name":"H2020 Societal Challenges"},{"id":"https://openalex.org/G4307641027","display_name":"Genomics and Personalized Medicine for all though Artificial Intelligence in Haematological Diseases","funder_award_id":"101017549","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4462577783","display_name":"Synthetic generation of hematological data over federated computing frameworks","funder_award_id":"101095530","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320337664","display_name":"H2020 Societal Challenges","ror":null},{"id":"https://openalex.org/F4320338342","display_name":"H2020 Health","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2101105183","https://openalex.org/W2565167788","https://openalex.org/W2963034797","https://openalex.org/W2963679759","https://openalex.org/W2978126559","https://openalex.org/W3004621080","https://openalex.org/W3022574011","https://openalex.org/W3080082779","https://openalex.org/W3087767709","https://openalex.org/W3088465102","https://openalex.org/W3096062691","https://openalex.org/W3103102495","https://openalex.org/W3213204049","https://openalex.org/W4234504725","https://openalex.org/W4315435229","https://openalex.org/W4386082956","https://openalex.org/W4387913006","https://openalex.org/W4394838972","https://openalex.org/W6718379498","https://openalex.org/W6761205521","https://openalex.org/W6765451912","https://openalex.org/W6765779288","https://openalex.org/W6780008085","https://openalex.org/W6787732974","https://openalex.org/W6790857497","https://openalex.org/W6804851780"],"related_works":["https://openalex.org/W2133194887","https://openalex.org/W2105321464","https://openalex.org/W2887774187","https://openalex.org/W2388220555","https://openalex.org/W3048739257","https://openalex.org/W1665563134","https://openalex.org/W2963604926","https://openalex.org/W1616881371","https://openalex.org/W1520875569","https://openalex.org/W2199957582"],"abstract_inverted_index":{"The":[0,174,190],"ever-increasing":[1],"use":[2,159],"of":[3,73,95,116,136,176,184],"generative":[4],"models":[5],"in":[6,102,160,225,247],"various":[7,248],"fields":[8],"where":[9],"tabular":[10,237],"data":[11,52,238,245],"is":[12,166,179],"used":[13],"highlights":[14],"the":[15,25,71,93,113,128,134,144,150,161,240],"need":[16],"for":[17,58,66,200,206],"robust":[18],"and":[19,29,38,42,118,149,168,217,239],"standardized":[20],"validation":[21,110,246],"metrics":[22],"to":[23,70,91,107,126,242],"assess":[24],"similarity":[26],"between":[27,51,131],"real":[28,117],"synthetic":[30,119,220,244],"data.":[31,120],"Current":[32],"methods":[33],"lack":[34],"a":[35,55,84,104,109,123,171,182,204,214,231],"unified":[36],"framework":[37],"rely":[39],"on":[40,213],"diverse":[41],"often":[43],"inconclusive":[44],"statistical":[45],"measures.":[46],"Divergences,":[47],"which":[48],"quantify":[49],"discrepancies":[50],"distributions,":[53,202],"offer":[54],"promising":[56],"avenue":[57],"validation.":[59],"However,":[60],"traditional":[61],"approaches":[62],"calculate":[63,141],"divergences":[64,196],"independently":[65],"each":[67],"feature":[68],"due":[69],"complexity":[72],"joint":[74,114],"distribution":[75,115,188],"modeling.":[76],"This":[77,228],"paper":[78],"addresses":[79],"this":[80,177],"challenge":[81],"by":[82],"proposing":[83],"novel":[85],"approach":[86,178],"that":[87],"uses":[88],"divergence":[89,105,148,155,165],"estimation":[90],"overcome":[92],"limitations":[94],"marginal":[96],"comparisons.":[97],"Our":[98],"core":[99],"contribution":[100,233],"lies":[101],"applying":[103],"estimator":[106],"build":[108],"metric":[111],"considering":[112],"We":[121,139],"leverage":[122],"probabilistic":[124],"classifier":[125],"approximate":[127],"density":[129],"ratio":[130],"datasets,":[132],"allowing":[133],"capture":[135],"complex":[137],"relations.":[138],"specifically":[140],"two":[142],"divergences:":[143],"well-known":[145],"Kullback-Leibler":[146],"(KL)":[147],"Jensen-Shannon":[151],"(JS)":[152],"divergence.":[153],"KL":[154],"offers":[156,230],"an":[157],"established":[158],"field,":[162],"while":[163],"JS":[164],"symmetric":[167],"bounded,":[169],"providing":[170],"reliable":[172],"metric.":[173],"efficacy":[175],"demonstrated":[180],"through":[181],"series":[183],"experiments":[185],"with":[186,197,234],"varying":[187],"complexities.":[189],"initial":[191],"phase":[192],"involves":[193],"comparing":[194],"estimated":[195],"analytical":[198],"solutions":[199],"simple":[201],"setting":[203],"benchmark":[205],"accuracy.":[207],"Finally,":[208],"we":[209],"validate":[210],"our":[211],"method":[212],"real-world":[215],"dataset":[216],"its":[218,223],"corresponding":[219],"counterpart,":[221],"showcasing":[222],"effectiveness":[224],"practical":[226],"applications.":[227],"research":[229],"significant":[232],"applicability":[235],"beyond":[236],"potential":[241],"improve":[243],"fields.":[249]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":3}],"updated_date":"2026-06-30T13:55:48.251075","created_date":"2025-10-10T00:00:00"}
