{"id":"https://openalex.org/W4400375396","doi":"https://doi.org/10.1145/3742435","title":"Diffusion Models for Tabular Data Imputation and Synthetic Data Generation","display_name":"Diffusion Models for Tabular Data Imputation and Synthetic Data Generation","publication_year":2025,"publication_date":"2025-06-10","ids":{"openalex":"https://openalex.org/W4400375396","doi":"https://doi.org/10.1145/3742435"},"language":"en","primary_location":{"id":"doi:10.1145/3742435","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3742435","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.02549","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029438736","display_name":"Mario Villaiz\u00e1n-Vallelado","orcid":"https://orcid.org/0009-0002-0754-1742"},"institutions":[{"id":"https://openalex.org/I108103353","display_name":"Universidad de Valladolid","ror":"https://ror.org/01fvbaw18","country_code":"ES","type":"education","lineage":["https://openalex.org/I108103353"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Mario Villaiz\u00e1n-Vallelado","raw_affiliation_strings":["Universidad de Valladolid, Valladolid, Spain and Telef\u00f3nica Scientific Research, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Universidad de Valladolid, Valladolid, Spain and Telef\u00f3nica Scientific Research, Madrid, Spain","institution_ids":["https://openalex.org/I108103353"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039099751","display_name":"Matteo Salvatori","orcid":"https://orcid.org/0000-0003-1499-6024"},"institutions":[{"id":"https://openalex.org/I4210097190","display_name":"Telef\u00f3nica (Spain)","ror":"https://ror.org/012f7tj07","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210097190"]},{"id":"https://openalex.org/I4210134591","display_name":"Telefonica Research and Development","ror":"https://ror.org/03qgzzb04","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210097190","https://openalex.org/I4210134591"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Matteo Salvatori","raw_affiliation_strings":["Telef\u00f3nica Scientific Research, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Telef\u00f3nica Scientific Research, Madrid, Spain","institution_ids":["https://openalex.org/I4210097190","https://openalex.org/I4210134591"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023990637","display_name":"Carlos Segura","orcid":"https://orcid.org/0000-0002-5431-5927"},"institutions":[{"id":"https://openalex.org/I4210097190","display_name":"Telef\u00f3nica (Spain)","ror":"https://ror.org/012f7tj07","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210097190"]},{"id":"https://openalex.org/I4210134591","display_name":"Telefonica Research and Development","ror":"https://ror.org/03qgzzb04","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210097190","https://openalex.org/I4210134591"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Carlos Segura","raw_affiliation_strings":["Telef\u00f3nica Scientific Research, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Telef\u00f3nica Scientific Research, Madrid, Spain","institution_ids":["https://openalex.org/I4210097190","https://openalex.org/I4210134591"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015760420","display_name":"Ioannis Arapakis","orcid":null},"institutions":[{"id":"https://openalex.org/I4210097190","display_name":"Telef\u00f3nica (Spain)","ror":"https://ror.org/012f7tj07","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210097190"]},{"id":"https://openalex.org/I4210134591","display_name":"Telefonica Research and Development","ror":"https://ror.org/03qgzzb04","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210097190","https://openalex.org/I4210134591"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Ioannis Arapakis","raw_affiliation_strings":["Telef\u00f3nica Scientific Research, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Telef\u00f3nica Scientific Research, Madrid, Spain","institution_ids":["https://openalex.org/I4210097190","https://openalex.org/I4210134591"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5029438736"],"corresponding_institution_ids":["https://openalex.org/I108103353"],"apc_list":null,"apc_paid":null,"fwci":14.4104,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.98539255,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"19","issue":"6","first_page":"1","last_page":"32"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.04129999876022339,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.04129999876022339,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.04010000079870224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/imputation","display_name":"Imputation (statistics)","score":0.6439732313156128},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5764192342758179},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.489999920129776},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4168221354484558},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.32593798637390137},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3254518508911133},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.16625750064849854},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1531865894794464},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14186426997184753},{"id":"https://openalex.org/keywords/thermodynamics","display_name":"Thermodynamics","score":0.06264451146125793}],"concepts":[{"id":"https://openalex.org/C58041806","wikidata":"https://www.wikidata.org/wiki/Q1660484","display_name":"Imputation (statistics)","level":3,"score":0.6439732313156128},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5764192342758179},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.489999920129776},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4168221354484558},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.32593798637390137},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3254518508911133},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.16625750064849854},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1531865894794464},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14186426997184753},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.06264451146125793},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3742435","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3742435","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2407.02549","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02549","pdf_url":"https://arxiv.org/pdf/2407.02549","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:uvadoc.uva.es:10324/78812","is_oa":true,"landing_page_url":"https://uvadoc.uva.es/handle/10324/78812","pdf_url":null,"source":{"id":"https://openalex.org/S4306401553","display_name":"UVaDOC UVaDOC University of Valladolid Documentary Repository (University of Valladolid)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I108103353","host_organization_name":"Universidad de Valladolid","host_organization_lineage":["https://openalex.org/I108103353"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"doi:10.48550/arxiv.2407.02549","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2407.02549","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.02549","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02549","pdf_url":"https://arxiv.org/pdf/2407.02549","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4400375396.pdf"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W2949676527","https://openalex.org/W6888840370","https://openalex.org/W3005285192","https://openalex.org/W6772013027","https://openalex.org/W6963881321","https://openalex.org/W3202428668","https://openalex.org/W2295598076","https://openalex.org/W2094978084","https://openalex.org/W2911978475","https://openalex.org/W3120644841","https://openalex.org/W4296631245","https://openalex.org/W2591882872","https://openalex.org/W3179164182","https://openalex.org/W2995853232","https://openalex.org/W1977098485","https://openalex.org/W2768348081","https://openalex.org/W3154653218","https://openalex.org/W2146950091","https://openalex.org/W3048593389","https://openalex.org/W3176786489","https://openalex.org/W2930926105","https://openalex.org/W2023320048","https://openalex.org/W6675354045","https://openalex.org/W4392741075","https://openalex.org/W4312933868","https://openalex.org/W2129069237","https://openalex.org/W3115920135","https://openalex.org/W4298826872","https://openalex.org/W2022851810","https://openalex.org/W3198435832","https://openalex.org/W3172256710","https://openalex.org/W4312358791","https://openalex.org/W4206912392"],"related_works":["https://openalex.org/W4211215373","https://openalex.org/W3217094455","https://openalex.org/W2989589450","https://openalex.org/W3119637569","https://openalex.org/W2405773734","https://openalex.org/W2791189374","https://openalex.org/W3123325766","https://openalex.org/W2058928557","https://openalex.org/W2898623405","https://openalex.org/W2374234271"],"abstract_inverted_index":{"Data":[0],"imputation":[1,137],"and":[2,20,44,86,108,138,172,202,235,286,302,307,315],"data":[3,15,34,38,67,123,136,140,211,265,283,288,309],"generation":[4,141,266],"have":[5,24,49],"important":[6,192],"applications":[7],"across":[8,36,221],"many":[9],"domains":[10,298],"where":[11,304],"incomplete":[12],"or":[13,121],"missing":[14,135,225],"can":[16,274],"hinder":[17],"accurate":[18],"analysis":[19],"decision-making.":[21],"Diffusion":[22,173],"models":[23,29,158],"emerged":[25],"as":[26,41,82,166,300],"powerful":[27],"generative":[28],"capable":[30],"of":[31,156,184,210,217,224,260],"capturing":[32],"complex":[33],"distributions":[35],"various":[37],"modalities":[39],"such":[40,165,299],"image,":[42],"audio,":[43],"time":[45],"series.":[46],"Recently,":[47],"they":[48],"been":[50],"also":[51],"adapted":[52],"to":[53,96,101,131,190,239,281],"generate":[54,278],"tabular":[55,66],"data.":[56,110],"In":[57],"this":[58],"article,":[59],"we":[60,213],"propose":[61],"a":[62,74,144,149,247,257,268],"diffusion":[63,157],"model":[64,115,130,273,317],"for":[65,290,297,312],"that":[68],"introduces":[69],"three":[70,191],"key":[71],"enhancements:":[72],"(1)":[73,195],"conditioning":[75,91,161,263],"attention":[76,92],"mechanism,":[77],"(2)":[78,199],"an":[79],"encoder\u2013decoder":[80],"transformer":[81,112,160],"the":[83,98,103,106,118,154,182,185,208,215,218,240,264,272],"denoising":[84],"network,":[85],"(3)":[87,203],"dynamic":[88,126],"masking.":[89],"The":[90,111,227],"mechanism":[93],"is":[94],"designed":[95],"improve":[97,287],"model\u2019s":[99],"ability":[100],"capture":[102],"relationship":[104],"between":[105],"condition":[107,119],"synthetic":[109,122,139],"layers":[113],"help":[114],"interactions":[116],"within":[117,143],"(encoder)":[120],"(decoder),":[124],"while":[125,242],"masking":[127],"enables":[128],"our":[129],"efficiently":[132],"handle":[133],"both":[134],"tasks":[142],"unified":[145],"framework.":[146],"We":[147],"conduct":[148],"comprehensive":[150],"evaluation":[151,179],"by":[152],"comparing":[153],"performance":[155,253],"with":[159,188,256],"against":[162],"state-of-the-art":[163],"techniques":[164],"Variational":[167],"Autoencoders,":[168],"Generative":[169],"Adversarial":[170],"Networks,":[171],"Models,":[174],"on":[175,181,267],"benchmark":[176],"datasets.":[177],"Our":[178],"focuses":[180],"assessment":[183],"generated":[186,219],"samples":[187,220],"respect":[189],"criteria,":[193],"namely:":[194],"machine":[196,232],"learning":[197,233],"efficiency,":[198],"statistical":[200,236],"similarity,":[201],"privacy":[204,244],"risk":[205],"mitigation.":[206],"For":[207],"task":[209],"imputation,":[212],"consider":[214],"efficiency":[216,234],"different":[222],"levels":[223],"features.":[226,261],"results":[228],"demonstrate":[229],"average":[230],"superior":[231],"accuracy":[237],"compared":[238],"baselines,":[241],"maintaining":[243],"risks":[245],"at":[246],"comparable":[248],"level,":[249],"particularly":[250],"showing":[251],"increased":[252],"in":[254],"datasets":[255,280],"large":[258],"number":[259],"By":[262],"desired":[269],"target":[270],"variable,":[271],"mitigate":[275],"systemic":[276],"biases,":[277],"augmented":[279],"address":[282],"imbalance":[284],"issues,":[285],"quality":[289],"subsequent":[291],"analysis.":[292],"This":[293],"has":[294],"significant":[295],"implications":[296],"healthcare":[301],"finance,":[303],"accurate,":[305],"unbiased,":[306],"privacy-preserving":[308],"are":[310],"critical":[311],"informed":[313],"decision-making":[314],"fair":[316],"outcomes.":[318]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2024-07-06T00:00:00"}
