{"id":"https://openalex.org/W7156015805","doi":"https://doi.org/10.48550/arxiv.2604.22337","title":"TabSCM: A practical Framework for Generating Realistic Tabular Data","display_name":"TabSCM: A practical Framework for Generating Realistic Tabular Data","publication_year":2026,"publication_date":"2026-04-24","ids":{"openalex":"https://openalex.org/W7156015805","doi":"https://doi.org/10.48550/arxiv.2604.22337"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.22337","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22337","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.22337","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134697177","display_name":"Sven Jacob","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jacob, Sven","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017702643","display_name":"Bardh Prenkaj","orcid":"https://orcid.org/0000-0002-2991-2279"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prenkaj, Bardh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107807902","display_name":"Weijia Shao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shao, Weijia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134743727","display_name":"Gjergji Kasneci","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kasneci, Gjergji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5134697177"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.35580000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.35580000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.21080000698566437,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.07800000160932541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.8309999704360962},{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.7502999901771545},{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.742900013923645},{"id":"https://openalex.org/keywords/directed-acyclic-graph","display_name":"Directed acyclic graph","score":0.6488999724388123},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.5971999764442444},{"id":"https://openalex.org/keywords/conditional-independence","display_name":"Conditional independence","score":0.527899980545044},{"id":"https://openalex.org/keywords/causal-structure","display_name":"Causal structure","score":0.4722000062465668},{"id":"https://openalex.org/keywords/causal-model","display_name":"Causal model","score":0.4203999936580658}],"concepts":[{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.8309999704360962},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.7502999901771545},{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.742900013923645},{"id":"https://openalex.org/C74197172","wikidata":"https://www.wikidata.org/wiki/Q1195339","display_name":"Directed acyclic graph","level":2,"score":0.6488999724388123},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5976999998092651},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.5971999764442444},{"id":"https://openalex.org/C79772020","wikidata":"https://www.wikidata.org/wiki/Q5159264","display_name":"Conditional independence","level":2,"score":0.527899980545044},{"id":"https://openalex.org/C163504300","wikidata":"https://www.wikidata.org/wiki/Q2364925","display_name":"Causal structure","level":2,"score":0.4722000062465668},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.4203999936580658},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3901999890804291},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.37929999828338623},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.375900000333786},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.3303999900817871},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3287999927997589},{"id":"https://openalex.org/C44492722","wikidata":"https://www.wikidata.org/wiki/Q327069","display_name":"Conditional probability","level":2,"score":0.321399986743927},{"id":"https://openalex.org/C146380142","wikidata":"https://www.wikidata.org/wiki/Q1137726","display_name":"Directed graph","level":2,"score":0.3127000033855438},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.30550000071525574},{"id":"https://openalex.org/C87007009","wikidata":"https://www.wikidata.org/wiki/Q210832","display_name":"Statistical hypothesis testing","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.2930000126361847},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2840000092983246},{"id":"https://openalex.org/C186215838","wikidata":"https://www.wikidata.org/wiki/Q772232","display_name":"Conditional expectation","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26899999380111694},{"id":"https://openalex.org/C71889745","wikidata":"https://www.wikidata.org/wiki/Q1783264","display_name":"Counterfactual conditional","level":3,"score":0.26579999923706055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.22337","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22337","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.22337","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22337","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5262432098388672,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Most":[0],"tabular-data":[1],"generators":[2],"match":[3],"marginal":[4],"statistics":[5],"yet":[6],"ignore":[7],"causal":[8,28,42,175],"structure,":[9],"leading":[10],"downstream":[11,122],"models":[12,76,155],"to":[13,50,150],"learn":[14],"spurious":[15],"or":[16,59,111],"unfair":[17],"patterns.":[18],"We":[19],"present":[20],"TabSCM,":[21],"a":[22,32,51,168],"mixed-type":[23],"generator":[24],"that":[25],"preserves":[26],"those":[27],"dependencies.":[29],"Starting":[30],"from":[31],"Completed":[33],"Partially":[34],"Directed":[35],"Acyclic":[36],"Graph":[37],"(CPDAG)":[38],"found":[39],"by":[40],"any":[41],"structure":[43],"discovery":[44],"algorithm,":[45],"TabSCM":[46,109,167],"(i)":[47],"orients":[48],"edges":[49],"DAG,":[52],"(ii)":[53],"fits":[54],"root-node":[55],"marginals":[56],"with":[57],"KDE":[58],"categorical":[60,87],"frequencies,":[61],"and":[62,83,95,116,124,132,136,156,163,174],"(iii)":[63],"learns":[64],"topologically":[65],"ordered":[66],"structural":[67],"assignments.":[68],"Such":[69],"assignments":[70],"are":[71],"achieved":[72],"using":[73],"conditional":[74,138],"diffusion":[75],"for":[77,86,160,171],"continuous":[78],"variables":[79],"as":[80],"child":[81],"nodes":[82],"gradient-boosted":[84],"trees":[85],"ones.":[88],"Ancestral":[89],"sampling":[90],"yields":[91],"semantically":[92],"valid":[93],"records":[94],"enables":[96],"exact":[97],"counterfactual":[98],"queries.":[99],"On":[100],"seven":[101],"public":[102],"datasets,":[103],"encompassing":[104],"healthcare,":[105],"finance,":[106],"housing,":[107],"environment,":[108],"matches":[110],"surpasses":[112],"state-of-the-art":[113],"GAN,":[114],"diffusion,":[115],"LLM":[117],"baselines":[118],"in":[119],"statistical":[120],"fidelity,":[121],"utility,":[123],"privacy":[125],"risk,":[126],"while":[127],"also":[128],"cutting":[129],"rule-violation":[130],"rates":[131],"providing":[133],"causally":[134],"meaningful":[135],"robust":[137],"interventions.":[139],"Because":[140],"generation":[141],"is":[142],"decomposed":[143],"into":[144],"explicit":[145],"equations,":[146],"it":[147],"runs":[148],"up":[149],"583$\\times$":[151],"faster":[152],"than":[153],"diffusion-only":[154],"exposes":[157],"interpretable":[158],"knobs":[159],"fairness":[161],"auditing":[162],"policy":[164],"simulation,":[165],"making":[166],"practical":[169],"choice":[170],"realism,":[172],"explainability,":[173],"soundness.":[176]},"counts_by_year":[],"updated_date":"2026-04-28T06:12:00.211691","created_date":"2026-04-28T00:00:00"}
