{"id":"https://openalex.org/W7127059340","doi":"https://doi.org/10.1007/s41666-026-00229-9","title":"A Scoping Review of Synthetic Data Generation by Language Models in Biomedical Research and Application: Data Utility and Quality Perspectives","display_name":"A Scoping Review of Synthetic Data Generation by Language Models in Biomedical Research and Application: Data Utility and Quality Perspectives","publication_year":2026,"publication_date":"2026-02-02","ids":{"openalex":"https://openalex.org/W7127059340","doi":"https://doi.org/10.1007/s41666-026-00229-9"},"language":"en","primary_location":{"id":"doi:10.1007/s41666-026-00229-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s41666-026-00229-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s41666-026-00229-9.pdf","source":{"id":"https://openalex.org/S4210196546","display_name":"Journal of Healthcare Informatics Research","issn_l":"2509-4971","issn":["2509-4971","2509-498X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Healthcare Informatics Research","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s41666-026-00229-9.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124734328","display_name":"Hanshu Rao","orcid":null},"institutions":[{"id":"https://openalex.org/I94658018","display_name":"University of Memphis","ror":"https://ror.org/01cq23130","country_code":"US","type":"education","lineage":["https://openalex.org/I94658018"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hanshu Rao","raw_affiliation_strings":["Department of Computer Science, University of Memphis, Memphis, 38152, TN, United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Memphis, Memphis, 38152, TN, United States","institution_ids":["https://openalex.org/I94658018"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124742955","display_name":"Weisi Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I94658018","display_name":"University of Memphis","ror":"https://ror.org/01cq23130","country_code":"US","type":"education","lineage":["https://openalex.org/I94658018"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weisi Liu","raw_affiliation_strings":["Department of Computer Science, University of Memphis, Memphis, 38152, TN, United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Memphis, Memphis, 38152, TN, United States","institution_ids":["https://openalex.org/I94658018"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124743502","display_name":"Haohan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haohan Wang","raw_affiliation_strings":["School of Information Sciences, University of Illinois Urbana-Champaign, Champaign, 61820, IL, United States"],"affiliations":[{"raw_affiliation_string":"School of Information Sciences, University of Illinois Urbana-Champaign, Champaign, 61820, IL, United States","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124746397","display_name":"I-Chan Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I1313298211","display_name":"St. Jude Children's Research Hospital","ror":"https://ror.org/02r3e0967","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1313298211","https://openalex.org/I2802152183"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"I-Chan Huang","raw_affiliation_strings":["Epidemiology and Cancer Control, St Jude Children\u2019s Research Hospital, Memphis, 38105, TN, United States"],"affiliations":[{"raw_affiliation_string":"Epidemiology and Cancer Control, St Jude Children\u2019s Research Hospital, Memphis, 38105, TN, United States","institution_ids":["https://openalex.org/I1313298211"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124750037","display_name":"Zhe He","orcid":null},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhe He","raw_affiliation_strings":["School of Information, Florida State University, Tallahassee, 32306, FL, United States"],"affiliations":[{"raw_affiliation_string":"School of Information, Florida State University, Tallahassee, 32306, FL, United States","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124720043","display_name":"Xiaolei Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I94658018","display_name":"University of Memphis","ror":"https://ror.org/01cq23130","country_code":"US","type":"education","lineage":["https://openalex.org/I94658018"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiaolei Huang","raw_affiliation_strings":["Department of Computer Science, University of Memphis, Memphis, 38152, TN, United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Memphis, Memphis, 38152, TN, United States","institution_ids":["https://openalex.org/I94658018"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5124720043"],"corresponding_institution_ids":["https://openalex.org/I94658018"],"apc_list":{"value":2490,"currency":"EUR","value_usd":3090},"apc_paid":{"value":2490,"currency":"EUR","value_usd":3090},"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.78266898,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.2971999943256378,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.2971999943256378,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.0949999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.06300000101327896,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.5406000018119812},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4875999987125397},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.4837999939918518},{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural language generation","score":0.44780001044273376},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.44020000100135803},{"id":"https://openalex.org/keywords/data-collection","display_name":"Data collection","score":0.398499995470047},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.38580000400543213},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.37220001220703125}],"concepts":[{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6467999815940857},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6072999835014343},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.5406000018119812},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4875999987125397},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.4837999939918518},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.44780001044273376},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.44020000100135803},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.398499995470047},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.38580000400543213},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.37220001220703125},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.33739998936653137},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.33719998598098755},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.3346000015735626},{"id":"https://openalex.org/C189708586","wikidata":"https://www.wikidata.org/wiki/Q1504425","display_name":"Systematic review","level":3,"score":0.3075000047683716},{"id":"https://openalex.org/C197947376","wikidata":"https://www.wikidata.org/wiki/Q5155608","display_name":"Comparability","level":2,"score":0.28060001134872437},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.27219998836517334},{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s41666-026-00229-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s41666-026-00229-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s41666-026-00229-9.pdf","source":{"id":"https://openalex.org/S4210196546","display_name":"Journal of Healthcare Informatics Research","issn_l":"2509-4971","issn":["2509-4971","2509-498X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Healthcare Informatics Research","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s41666-026-00229-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s41666-026-00229-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s41666-026-00229-9.pdf","source":{"id":"https://openalex.org/S4210196546","display_name":"Journal of Healthcare Informatics Research","issn_l":"2509-4971","issn":["2509-4971","2509-498X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Healthcare Informatics Research","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.438050240278244,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W7127059340.pdf"},"referenced_works_count":73,"referenced_works":["https://openalex.org/W2164777277","https://openalex.org/W2891378911","https://openalex.org/W3024357968","https://openalex.org/W3103420681","https://openalex.org/W3118615836","https://openalex.org/W3164109169","https://openalex.org/W3169068430","https://openalex.org/W3178751578","https://openalex.org/W4206094367","https://openalex.org/W4289520498","https://openalex.org/W4313439128","https://openalex.org/W4313644512","https://openalex.org/W4322102006","https://openalex.org/W4323651701","https://openalex.org/W4324031756","https://openalex.org/W4381252028","https://openalex.org/W4386324432","https://openalex.org/W4388725043","https://openalex.org/W4389519059","https://openalex.org/W4389520213","https://openalex.org/W4389612030","https://openalex.org/W4389729357","https://openalex.org/W4390745503","https://openalex.org/W4391164242","https://openalex.org/W4393397034","https://openalex.org/W4394853875","https://openalex.org/W4394975332","https://openalex.org/W4396674388","https://openalex.org/W4397024262","https://openalex.org/W4398223181","https://openalex.org/W4398239341","https://openalex.org/W4399943982","https://openalex.org/W4400037569","https://openalex.org/W4400044222","https://openalex.org/W4400128031","https://openalex.org/W4400336367","https://openalex.org/W4400493343","https://openalex.org/W4401042118","https://openalex.org/W4401070453","https://openalex.org/W4401612165","https://openalex.org/W4401753960","https://openalex.org/W4402527175","https://openalex.org/W4402580824","https://openalex.org/W4402669773","https://openalex.org/W4402683957","https://openalex.org/W4402684073","https://openalex.org/W4402684318","https://openalex.org/W4402722038","https://openalex.org/W4403789625","https://openalex.org/W4404443063","https://openalex.org/W4404533093","https://openalex.org/W4404586423","https://openalex.org/W4404642707","https://openalex.org/W4404703692","https://openalex.org/W4404781981","https://openalex.org/W4404782216","https://openalex.org/W4404867262","https://openalex.org/W4406518786","https://openalex.org/W4406604651","https://openalex.org/W4407020383","https://openalex.org/W4407174976","https://openalex.org/W4407187061","https://openalex.org/W4407565793","https://openalex.org/W4407739613","https://openalex.org/W4408182377","https://openalex.org/W4408296193","https://openalex.org/W4408566602","https://openalex.org/W4408613811","https://openalex.org/W4408725130","https://openalex.org/W4409091111","https://openalex.org/W4410028173","https://openalex.org/W4410415527","https://openalex.org/W4412568371"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"Synthetic":[1],"data":[2,15,32,45,91,102,109,148],"generation":[3,33,92,116],"using":[4],"large":[5],"language":[6],"models":[7,126],"(LLMs)":[8],"demonstrates":[9],"substantial":[10],"promise":[11],"in":[12,21,30,93,147,177],"addressing":[13],"biomedical":[14,22,35,94,178],"challenges":[16],"and":[17,37,48,62,68,76,111,124,137,143,153,156,170],"shows":[18],"increasing":[19],"adoption":[20],"research.":[23,179],"This":[24],"study":[25],"systematically":[26],"reviews":[27],"recent":[28],"advances":[29],"synthetic":[31,90],"for":[34],"applications":[36,176],"clinical":[38],"research,":[39],"focusing":[40],"on":[41,87,164],"how":[42],"LLMs":[43],"address":[44],"scarcity,":[46],"utility,":[47,151],"quality":[49],"issues":[50],"with":[51],"different":[52],"modalities.":[53],"We":[54],"conducted":[55],"a":[56],"scoping":[57],"review":[58],"following":[59],"PRISMA-ScR":[60],"guidelines":[61],"searched":[63],"literature":[64],"published":[65],"between":[66],"2020":[67],"2025":[69],"through":[70],"PubMed,":[71],"ACM,":[72],"Web":[73],"of":[74,81],"Science,":[75],"Google":[77],"Scholar.":[78],"A":[79],"total":[80],"59":[82],"studies":[83],"were":[84,104,129],"included":[85,118],"based":[86],"relevance":[88],"to":[89,173],"contexts.":[95],"Among":[96],"the":[97,100],"reviewed":[98],"studies,":[99],"predominant":[101],"modalities":[103],"unstructured":[105],"texts":[106],"(78.0%),":[107],"tabular":[108],"(13.6%),":[110],"multimodal":[112],"sources":[113],"(8.4%).":[114],"Common":[115],"methods":[117],"LLM":[119],"prompting":[120],"(74.6%),":[121],"fine-tuning":[122],"(20.3%),":[123],"specialized":[125],"(5.1%).":[127],"Evaluations":[128],"heterogeneous:":[130],"intrinsic":[131],"metrics":[132],"(27.1%),":[133],"human-in-the-loop":[134],"assessments":[135],"(44.1%),":[136],"LLM-based":[138],"evaluations":[139],"(13.6%).":[140],"However,":[141],"limitations":[142],"key":[144],"barriers":[145],"persist":[146],"modalities,":[149],"domain":[150],"resource":[152],"model":[154],"accessibility,":[155],"standardized":[157],"evaluation":[158,168],"protocols.":[159],"Future":[160],"efforts":[161],"may":[162],"focus":[163],"developing":[165],"standardized,":[166],"transparent":[167],"frameworks":[169],"expanding":[171],"accessibility":[172],"support":[174],"effective":[175]},"counts_by_year":[],"updated_date":"2026-02-07T06:11:34.122080","created_date":"2026-02-03T00:00:00"}
