{"id":"https://openalex.org/W4307926959","doi":"https://doi.org/10.48550/arxiv.2210.16405","title":"Evaluation of Categorical Generative Models -- Bridging the Gap Between Real and Synthetic Data","display_name":"Evaluation of Categorical Generative Models -- Bridging the Gap Between Real and Synthetic Data","publication_year":2022,"publication_date":"2022-10-28","ids":{"openalex":"https://openalex.org/W4307926959","doi":"https://doi.org/10.48550/arxiv.2210.16405"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2210.16405","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.16405","pdf_url":"https://arxiv.org/pdf/2210.16405","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2210.16405","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065633053","display_name":"Florence Regol","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Regol, Florence","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082520160","display_name":"Anja Kroon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kroon, Anja","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5009031715","display_name":"Mark Coates","orcid":"https://orcid.org/0000-0001-5030-1379"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Coates, Mark","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5065633053"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11875","display_name":"Statistics Education and Methodologies","score":0.9714999794960022,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11875","display_name":"Statistics Education and Methodologies","score":0.9714999794960022,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9684000015258789,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9599000215530396,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.7752818465232849},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.747299313545227},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7284122109413147},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.7177392840385437},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.6671300530433655},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5763978362083435},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5428954362869263},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.5427995920181274},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5323529243469238},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4555846154689789},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.4260648190975189},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.4251770079135895},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.42180299758911133},{"id":"https://openalex.org/keywords/generative-design","display_name":"Generative Design","score":0.41998130083084106}],"concepts":[{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.7752818465232849},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.747299313545227},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7284122109413147},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.7177392840385437},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.6671300530433655},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5763978362083435},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5428954362869263},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.5427995920181274},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5323529243469238},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4555846154689789},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.4260648190975189},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.4251770079135895},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.42180299758911133},{"id":"https://openalex.org/C184408114","wikidata":"https://www.wikidata.org/wiki/Q1502022","display_name":"Generative Design","level":3,"score":0.41998130083084106},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2210.16405","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.16405","pdf_url":"https://arxiv.org/pdf/2210.16405","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2210.16405","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2210.16405","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2210.16405","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.16405","pdf_url":"https://arxiv.org/pdf/2210.16405","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4307926959.pdf","grobid_xml":"https://content.openalex.org/works/W4307926959.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3035069238","https://openalex.org/W4225274307","https://openalex.org/W3182611934","https://openalex.org/W2514264328","https://openalex.org/W4387030601","https://openalex.org/W4376988852","https://openalex.org/W4296551294","https://openalex.org/W4296960511","https://openalex.org/W3177295391","https://openalex.org/W2293526996"],"abstract_inverted_index":{"The":[0],"machine":[1],"learning":[2],"community":[3],"has":[4],"mainly":[5],"relied":[6],"on":[7,22,86,120,183,210],"real":[8],"data":[9,122],"to":[10,30,63,113,137,152,172],"benchmark":[11],"algorithms":[12],"as":[13,53,194],"it":[14],"provides":[15],"compelling":[16],"evidence":[17],"of":[18,35,66,91,103],"model":[19,136],"applicability.":[20],"Evaluation":[21],"synthetic":[23,92,208,212],"datasets":[24],"can":[25,46,79,161,189],"be":[26,47,80,162],"a":[27,32,36,114,134,139,142],"powerful":[28],"tool":[29],"provide":[31],"better":[33],"understanding":[34],"model's":[37],"strengths,":[38],"weaknesses,":[39],"and":[40,100,123,177,215],"overall":[41],"capabilities.":[42],"Gaining":[43],"these":[44],"insights":[45],"particularly":[48],"important":[49],"for":[50],"generative":[51,67,116,135,180,213,219],"modeling":[52,175],"the":[54,64,73,149,179,184,199],"target":[55],"quantity":[56],"is":[57,111],"completely":[58],"unknown.":[59],"Multiple":[60],"issues":[61],"related":[62],"evaluation":[65,84,128,205],"models":[68,181,214],"have":[69],"been":[70],"reported":[71],"in":[72,141],"literature.":[74],"We":[75,118,145,164,202],"argue":[76],"those":[77],"problems":[78],"avoided":[81],"by":[82],"an":[83,125],"based":[85,182],"ground":[87,200],"truth.":[88,201],"General":[89],"criticisms":[90],"experiments":[93,209],"are":[94,97],"that":[95],"they":[96,188],"too":[98,196],"simplified":[99],"not":[101],"representative":[102],"practical":[104],"scenarios.":[105],"As":[106],"such,":[107],"our":[108,204],"experimental":[109],"setting":[110],"tailored":[112],"realistic":[115],"task.":[117],"focus":[119],"categorical":[121,218],"introduce":[124],"appropriately":[126],"scalable":[127],"method.":[129],"Our":[130],"method":[131],"involves":[132],"tasking":[133],"learn":[138],"distribution":[140],"high-dimensional":[143],"setting.":[144],"then":[146],"successively":[147],"bin":[148],"large":[150,167],"space":[151],"obtain":[153],"smaller":[154],"probability":[155,168],"spaces":[156],"where":[157],"meaningful":[158],"statistical":[159],"tests":[160],"applied.":[163],"consider":[165],"increasingly":[166,173],"spaces,":[169],"which":[170],"correspond":[171],"difficult":[174],"tasks":[176],"compare":[178],"highest":[185],"task":[186],"difficulty":[187],"reach":[190],"before":[191],"being":[192,195],"detected":[193],"far":[197],"from":[198],"validate":[203],"procedure":[206],"with":[207],"both":[211],"current":[216],"state-of-the-art":[217],"models.":[220]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
