{"id":"https://openalex.org/W4226074841","doi":"https://doi.org/10.1177/14604582221077000","title":"A method for machine learning generation of realistic synthetic datasets for validating healthcare applications","display_name":"A method for machine learning generation of realistic synthetic datasets for validating healthcare applications","publication_year":2022,"publication_date":"2022-04-01","ids":{"openalex":"https://openalex.org/W4226074841","doi":"https://doi.org/10.1177/14604582221077000","pmid":"https://pubmed.ncbi.nlm.nih.gov/35414269"},"language":"en","primary_location":{"id":"doi:10.1177/14604582221077000","is_oa":true,"landing_page_url":"https://doi.org/10.1177/14604582221077000","pdf_url":null,"source":{"id":"https://openalex.org/S201800618","display_name":"Health Informatics Journal","issn_l":"1460-4582","issn":["1460-4582","1741-2811"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Health Informatics Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1177/14604582221077000","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026657286","display_name":"Theodoros N. Arvanitis","orcid":"https://orcid.org/0000-0001-5473-135X"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Theodoros N Arvanitis","raw_affiliation_strings":["Institute of Digital Healthcare, WMG, University of Warwick, Coventry, UK"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Healthcare, WMG, University of Warwick, Coventry, UK","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112439007","display_name":"Sean White","orcid":null},"institutions":[{"id":"https://openalex.org/I4210129883","display_name":"NHS Digital","ror":"https://ror.org/03am1eg44","country_code":"GB","type":"government","lineage":["https://openalex.org/I4210129883"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sean White","raw_affiliation_strings":["Clinical Assurance Team, NHS Digital, Leeds, UK"],"affiliations":[{"raw_affiliation_string":"Clinical Assurance Team, NHS Digital, Leeds, UK","institution_ids":["https://openalex.org/I4210129883"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013166634","display_name":"Stuart Harrison","orcid":"https://orcid.org/0000-0003-3873-4512"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Stuart Harrison","raw_affiliation_strings":["Institute of Digital Healthcare, WMG, University of Warwick, Coventry, UK"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Healthcare, WMG, University of Warwick, Coventry, UK","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044492731","display_name":"Rupert Chaplin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210129883","display_name":"NHS Digital","ror":"https://ror.org/03am1eg44","country_code":"GB","type":"government","lineage":["https://openalex.org/I4210129883"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Rupert Chaplin","raw_affiliation_strings":["Data Science and Innovation, NHS Digital, London, UK"],"affiliations":[{"raw_affiliation_string":"Data Science and Innovation, NHS Digital, London, UK","institution_ids":["https://openalex.org/I4210129883"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012597626","display_name":"George Despotou","orcid":"https://orcid.org/0000-0003-3437-6412"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"George Despotou","raw_affiliation_strings":["Institute of Digital Healthcare, WMG, University of Warwick, Coventry, UK"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Healthcare, WMG, University of Warwick, Coventry, UK","institution_ids":["https://openalex.org/I39555362"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5012597626"],"corresponding_institution_ids":["https://openalex.org/I39555362"],"apc_list":{"value":1500,"currency":"USD","value_usd":1500},"apc_paid":{"value":1500,"currency":"USD","value_usd":1500},"fwci":3.47,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.93346918,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"28","issue":"2","first_page":"14604582221077000","last_page":"14604582221077000"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.9815999865531921,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7821890115737915},{"id":"https://openalex.org/keywords/jaccard-index","display_name":"Jaccard index","score":0.7587695717811584},{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.7410076260566711},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5913428068161011},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.5475224256515503},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5370165109634399},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5107102394104004},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.493979275226593},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4562775194644928},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.4402347207069397},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.4339015781879425},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.13797658681869507}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7821890115737915},{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.7587695717811584},{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.7410076260566711},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5913428068161011},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.5475224256515503},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5370165109634399},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5107102394104004},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.493979275226593},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4562775194644928},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.4402347207069397},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.4339015781879425},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.13797658681869507},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003695","descriptor_name":"Delivery of Health Care","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003695","descriptor_name":"Delivery of Health Care","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003695","descriptor_name":"Delivery of Health Care","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1177/14604582221077000","is_oa":true,"landing_page_url":"https://doi.org/10.1177/14604582221077000","pdf_url":null,"source":{"id":"https://openalex.org/S201800618","display_name":"Health Informatics Journal","issn_l":"1460-4582","issn":["1460-4582","1741-2811"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Health Informatics Journal","raw_type":"journal-article"},{"id":"pmid:35414269","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35414269","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Health informatics journal","raw_type":null},{"id":"pmh:oai:wrap.warwick.ac.uk:164617","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400665","display_name":"Warwick Research Archive Portal (University of Warwick)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I39555362","host_organization_name":"University of Warwick","host_organization_lineage":["https://openalex.org/I39555362"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"}],"best_oa_location":{"id":"doi:10.1177/14604582221077000","is_oa":true,"landing_page_url":"https://doi.org/10.1177/14604582221077000","pdf_url":null,"source":{"id":"https://openalex.org/S201800618","display_name":"Health Informatics Journal","issn_l":"1460-4582","issn":["1460-4582","1741-2811"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Health Informatics Journal","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1219560291","display_name":null,"funder_award_id":"Economic","funder_id":"https://openalex.org/F4320334630","funder_display_name":"Economic and Social Research Council"},{"id":"https://openalex.org/G1934935867","display_name":null,"funder_award_id":"Engineering and Physical Sciences R","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4597177185","display_name":null,"funder_award_id":"Wellcome Trust","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320307418","display_name":"Pioneer Fund","ror":"https://ror.org/01mrzcs29"},{"id":"https://openalex.org/F4320311904","display_name":"Wellcome Trust","ror":"https://ror.org/029chgv08"},{"id":"https://openalex.org/F4320312933","display_name":"Department for Business, Energy and Industrial Strategy, UK Government","ror":"https://ror.org/019ya6433"},{"id":"https://openalex.org/F4320313269","display_name":"Scottish Government","ror":"https://ror.org/04v2xmd71"},{"id":"https://openalex.org/F4320319992","display_name":"British Heart Foundation","ror":"https://ror.org/02wdwnk04"},{"id":"https://openalex.org/F4320319994","display_name":"Department of Health and Social Care","ror":"https://ror.org/03sbpja79"},{"id":"https://openalex.org/F4320320853","display_name":"Public Health Agency","ror":"https://ror.org/03ek62e72"},{"id":"https://openalex.org/F4320333685","display_name":"Chief Scientist Office, Scottish Government Health and Social Care Directorate","ror":"https://ror.org/01613vh25"},{"id":"https://openalex.org/F4320334626","display_name":"Medical Research Council","ror":"https://ror.org/03x94j517"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320334630","display_name":"Economic and Social Research Council","ror":"https://ror.org/03n0ht308"},{"id":"https://openalex.org/F4320335543","display_name":"Health and Social Care Research and Development Division","ror":null},{"id":"https://openalex.org/F7757660664","display_name":"Health Data Research UK","ror":"https://ror.org/04rtjaj74"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2041810629","https://openalex.org/W2113452810","https://openalex.org/W2396881363","https://openalex.org/W2559655401","https://openalex.org/W2559898506","https://openalex.org/W2612690371","https://openalex.org/W2751687090","https://openalex.org/W2788872322","https://openalex.org/W2805183640","https://openalex.org/W2809400578","https://openalex.org/W2884805522","https://openalex.org/W2899141320","https://openalex.org/W2899705301","https://openalex.org/W2910350464","https://openalex.org/W2911423778","https://openalex.org/W2921914364","https://openalex.org/W2951133006","https://openalex.org/W2963185411","https://openalex.org/W2965372408","https://openalex.org/W2968985094","https://openalex.org/W3039904654","https://openalex.org/W3102123817","https://openalex.org/W4320930577","https://openalex.org/W6608993855"],"related_works":["https://openalex.org/W4254879869","https://openalex.org/W3022576529","https://openalex.org/W4238897586","https://openalex.org/W2628526247","https://openalex.org/W2596401011","https://openalex.org/W2913569734","https://openalex.org/W4401519790","https://openalex.org/W435179959","https://openalex.org/W2702570413","https://openalex.org/W2619091065"],"abstract_inverted_index":{"Digital":[0],"health":[1],"applications":[2],"can":[3],"improve":[4],"quality":[5],"and":[6,89,109,119,124,173],"effectiveness":[7],"of":[8,14,56,67,73,79,114,134,162,170,183,185],"healthcare,":[9],"by":[10],"offering":[11],"a":[12,23,62,74,112,174],"number":[13,133],"new":[15],"tools":[16],"to":[17,38,46,51,85],"users,":[18],"which":[19],"are":[20],"often":[21],"considered":[22],"medical":[24],"device.":[25],"Assuring":[26],"their":[27],"safe":[28],"operation":[29],"requires,":[30],"amongst":[31],"others,":[32],"clinical":[33,43,87],"validation,":[34],"needing":[35],"large":[36],"datasets":[37,47,58,189],"test":[39,177],"them":[40],"in":[41,111],"realistic":[42,80,187],"scenarios.":[44],"Access":[45],"is":[48,59,70,99,164],"challenging,":[49],"due":[50],"patient":[52],"privacy":[53],"concerns.":[54],"Development":[55],"synthetic":[57,81,143,146,188],"seen":[60],"as":[61],"potential":[63],"alternative.":[64],"The":[65,181],"objective":[66],"the":[68,71,77,92,138,142,155,193],"paper":[69],"development":[72],"method":[75],"for":[76,101,141,197],"generation":[78],"datasets,":[82,88],"statistically":[83],"equivalent":[84],"real":[86,156],"demonstrate":[90],"that":[91,148],"Generative":[93],"Adversarial":[94],"Network":[95],"(GAN)":[96],"based":[97],"approach":[98,194],"fit":[100],"purpose.":[102],"A":[103,132,145,167],"generative":[104],"adversarial":[105],"network":[106],"was":[107,158,190],"implemented":[108],"trained,":[110],"series":[113],"six":[115],"experiments,":[116],"using":[117],"numerical":[118],"categorical":[120],"variables,":[121],"including":[122],"ICD-9":[123],"laboratory":[125],"codes,":[126],"from":[127],"three":[128],"clinically":[129],"relevant":[130],"datasets.":[131],"contextual":[135],"steps":[136],"provided":[137],"success":[139],"criteria":[140],"dataset.":[144],"dataset":[147,157],"exhibits":[149],"very":[150,165],"similar":[151],"statistical":[152],"characteristics":[153],"with":[154,192],"generated.":[159],"Pairwise":[160],"association":[161],"variables":[163],"similar.":[166],"high":[168],"degree":[169],"Jaccard":[171],"similarity":[172],"successful":[175],"K-S":[176],"further":[178,198],"support":[179],"this.":[180],"proof":[182],"concept":[184],"generating":[186],"successful,":[191],"showing":[195],"promise":[196],"work.":[199]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
