{"id":"https://openalex.org/W4415428556","doi":"https://doi.org/10.3233/faia251280","title":"Generating Clinically Realistic EHR Data via a Hierarchy- and Semantics-Guided Transformer","display_name":"Generating Clinically Realistic EHR Data via a Hierarchy- and Semantics-Guided Transformer","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428556","doi":"https://doi.org/10.3233/faia251280"},"language":null,"primary_location":{"id":"doi:10.3233/faia251280","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251280","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251280","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091669273","display_name":"Guanglin Zhou","orcid":"https://orcid.org/0000-0002-5979-0057"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Guanglin Zhou","raw_affiliation_strings":["University of Queensland"],"affiliations":[{"raw_affiliation_string":"University of Queensland","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033997225","display_name":"Sebastiano Barbieri","orcid":"https://orcid.org/0000-0002-5919-372X"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Sebastiano Barbieri","raw_affiliation_strings":["University of Queensland"],"affiliations":[{"raw_affiliation_string":"University of Queensland","institution_ids":["https://openalex.org/I165143802"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5091669273"],"corresponding_institution_ids":["https://openalex.org/I165143802"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.5282622,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.975600004196167,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.975600004196167,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9592000246047974,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5051000118255615},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4763999879360199},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.47130000591278076},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.46869999170303345},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4052000045776367},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.400299996137619},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.3921999931335449},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.3903000056743622},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3345000147819519},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.3273000121116638}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7202000021934509},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5051000118255615},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4763999879360199},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.47130000591278076},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.46869999170303345},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43459999561309814},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4083000123500824},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40720000863075256},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4052000045776367},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.400299996137619},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.3921999931335449},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.3903000056743622},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3345000147819519},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3273000121116638},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.32499998807907104},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.31790000200271606},{"id":"https://openalex.org/C145642194","wikidata":"https://www.wikidata.org/wiki/Q870895","display_name":"Health informatics","level":3,"score":0.302700012922287},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.30239999294281006},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.29820001125335693},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.2971000075340271},{"id":"https://openalex.org/C21338462","wikidata":"https://www.wikidata.org/wiki/Q1662581","display_name":"Information model","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2962000072002411},{"id":"https://openalex.org/C3019952477","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Health records","level":3,"score":0.29490000009536743},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.28999999165534973},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.28200000524520874},{"id":"https://openalex.org/C206497026","wikidata":"https://www.wikidata.org/wiki/Q1753883","display_name":"SNOMED CT","level":3,"score":0.27570000290870667},{"id":"https://openalex.org/C3020144179","wikidata":"https://www.wikidata.org/wiki/Q10871684","display_name":"Electronic health record","level":3,"score":0.27379998564720154},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.26969999074935913},{"id":"https://openalex.org/C45827449","wikidata":"https://www.wikidata.org/wiki/Q5270338","display_name":"Diagnosis code","level":3,"score":0.26930001378059387},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.2547000050544739},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.25380000472068787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia251280","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251280","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia251280","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251280","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Generating":[0],"realistic":[1],"synthetic":[2,60,174],"electronic":[3],"health":[4],"records":[5],"(EHRs)":[6],"holds":[7],"tremendous":[8],"promise":[9],"for":[10,97],"accelerating":[11],"healthcare":[12],"research,":[13],"facilitating":[14],"AI":[15],"model":[16,138,148],"development":[17],"and":[18,50,68,83,94,109,115,162],"enhancing":[19],"patient":[20,61,178],"privacy.":[21],"However,":[22],"existing":[23],"generative":[24,99],"methods":[25],"typically":[26],"treat":[27],"EHRs":[28],"as":[29,180,182,188],"flat":[30],"sequences":[31,62],"of":[32,46,173],"discrete":[33],"medical":[34],"codes.":[35],"This":[36],"approach":[37],"overlooks":[38],"two":[39],"critical":[40],"aspects:":[41],"the":[42,51,81,98,142,149,160,170],"inherent":[43,153],"hierarchical":[44,93,104],"organization":[45],"clinical":[47,66,74,113,136,151],"coding":[48],"systems":[49],"rich":[52],"semantic":[53,95,130],"context":[54],"provided":[55],"by":[56],"code":[57,193],"descriptions.":[58],"Consequently,":[59],"often":[63],"lack":[64],"high":[65],"fidelity":[67],"have":[69],"limited":[70],"utility":[71],"in":[72,154],"downstream":[73,185],"tasks.":[75],"In":[76],"this":[77],"paper,":[78],"we":[79],"propose":[80],"Hierarchy-":[82],"Semantics-Guided":[84],"Transformer":[85],"(HiSGT),":[86],"a":[87,103,117,134],"novel":[88],"framework":[89],"that":[90,166],"leverages":[91],"both":[92],"information":[96],"process.":[100],"HiSGT":[101,167],"constructs":[102],"graph":[105,118],"to":[106,121,145],"encode":[107],"parent-child":[108],"sibling":[110],"relationships":[111],"among":[112],"codes":[114],"employs":[116],"neural":[119],"network":[120],"derive":[122],"hierarchy-aware":[123],"embeddings.":[124],"These":[125],"are":[126],"then":[127],"fused":[128],"with":[129,176],"embeddings":[131],"extracted":[132],"from":[133],"pre-trained":[135],"language":[137],"(e.g.,":[139],"ClinicalBERT),":[140],"enabling":[141],"Transformer-based":[143],"generator":[144],"more":[146],"accurately":[147],"nuanced":[150],"patterns":[152],"real":[155,177],"EHRs.":[156],"Extensive":[157],"experiments":[158],"on":[159],"MIMIC-III":[161],"MIMIC-IV":[163],"datasets":[164],"demonstrate":[165],"significantly":[168],"improves":[169],"statistical":[171],"alignment":[172],"data":[175],"records,":[179],"well":[181],"supports":[183],"robust":[184],"applications":[186],"such":[187],"chronic":[189],"disease":[190],"classification.":[191],"The":[192],"is":[194],"available":[195],"at":[196],"https://github.com/jameszhou-gl/HiSGT.":[197]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
