{"id":"https://openalex.org/W7131394715","doi":"https://doi.org/10.48550/arxiv.2602.19661","title":"PaReGTA: An LLM-based EHR Data Encoding Approach to Capture Temporal Information","display_name":"PaReGTA: An LLM-based EHR Data Encoding Approach to Capture Temporal Information","publication_year":2026,"publication_date":"2026-02-23","ids":{"openalex":"https://openalex.org/W7131394715","doi":"https://doi.org/10.48550/arxiv.2602.19661"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.19661","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19661","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.19661","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092130687","display_name":"Yoon Kihyuk","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yoon, Kihyuk","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126795497","display_name":"Lingchao Mao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mao, Lingchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039227082","display_name":"Catherine D. Chong","orcid":"https://orcid.org/0000-0002-9035-8748"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chong, Catherine","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019480561","display_name":"Todd J. Schwedt","orcid":"https://orcid.org/0000-0002-7780-7086"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schwedt, Todd J.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054188179","display_name":"Chia\u2010Chun Chiang","orcid":"https://orcid.org/0000-0001-7802-7172"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chiang, Chia-Chun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126787466","display_name":"Jing Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jing","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5092130687"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.8662999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.8662999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10350","display_name":"Electronic Health Records Systems","score":0.039400000125169754,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.01140000019222498,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.777899980545044},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.7192000150680542},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6417999863624573},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4291999936103821},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.38769999146461487},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.3709999918937683},{"id":"https://openalex.org/keywords/health-records","display_name":"Health records","score":0.3646000027656555},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.3578000068664551}],"concepts":[{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.777899980545044},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7634000182151794},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.7192000150680542},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6417999863624573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6107000112533569},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4510999917984009},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4291999936103821},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.38769999146461487},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37380000948905945},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3709999918937683},{"id":"https://openalex.org/C3019952477","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Health records","level":3,"score":0.3646000027656555},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.3578000068664551},{"id":"https://openalex.org/C3020144179","wikidata":"https://www.wikidata.org/wiki/Q10871684","display_name":"Electronic health record","level":3,"score":0.35359999537467957},{"id":"https://openalex.org/C2781039887","wikidata":"https://www.wikidata.org/wiki/Q1391724","display_name":"Factor (programming language)","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33820000290870667},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.31700000166893005},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.2913999855518341},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.27480000257492065},{"id":"https://openalex.org/C77277458","wikidata":"https://www.wikidata.org/wiki/Q1969246","display_name":"Temporal database","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C3019659195","wikidata":"https://www.wikidata.org/wiki/Q5690566","display_name":"Meaningful use","level":3,"score":0.2605000138282776},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2535000145435333}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.19661","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19661","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.19661","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19661","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Temporal":[0],"information":[1],"in":[2,11,100,170],"structured":[3],"electronic":[4],"health":[5],"records":[6],"(EHRs)":[7],"is":[8,105],"often":[9],"lost":[10],"sparse":[12,158],"one-hot":[13],"or":[14],"count-based":[15],"representations,":[16],"while":[17,164],"sequence":[18],"models":[19,167],"can":[20,96,108],"be":[21],"costly":[22],"and":[23,59,77,107,136],"data-hungry.":[24],"We":[25],"propose":[26],"PaReGTA,":[27],"an":[28],"LLM-based":[29],"encoding":[30],"framework":[31],"that":[32,73],"(i)":[33],"converts":[34],"longitudinal":[35],"EHR":[36],"events":[37],"into":[38,64],"visit-level":[39],"templated":[40],"text":[41],"with":[42],"explicit":[43],"temporal":[44,71],"cues,":[45],"(ii)":[46],"learns":[47],"domain-adapted":[48],"visit":[49,62],"embeddings":[50,63],"via":[51],"lightweight":[52],"contrastive":[53],"fine-tuning":[54],"of":[55,152],"a":[56,65,92,141],"sentence-embedding":[57,113],"model,":[58],"(iii)":[60],"aggregates":[61],"fixed-dimensional":[66],"patient":[67],"representation":[68,138],"using":[69],"hybrid":[70],"pooling":[72],"captures":[74],"both":[75],"recency":[76],"globally":[78],"informative":[79],"visits.":[80],"Because":[81],"PaReGTA":[82,104,156],"does":[83],"not":[84],"require":[85],"training":[86],"from":[87,110,149],"scratch":[88],"but":[89],"instead":[90],"utilizes":[91],"pre-trained":[93],"LLM,":[94],"it":[95],"perform":[97],"well":[98],"even":[99],"data-limited":[101],"cohorts.":[102],"Furthermore,":[103],"model-agnostic":[106],"benefit":[109],"future":[111],"EHR-specialized":[112],"models.":[114],"For":[115],"interpretability,":[116],"we":[117],"introduce":[118],"PaReGTA-RSS":[119],"(Representation":[120],"Shift":[121],"Score),":[122],"which":[123],"quantifies":[124],"clinically":[125],"defined":[126],"factor":[127,134],"importance":[128],"by":[129],"recomputing":[130],"representations":[131],"after":[132],"targeted":[133],"removal":[135],"projecting":[137],"shifts":[139],"through":[140],"machine":[142],"learning":[143],"model.":[144],"On":[145],"39,088":[146],"migraine":[147,161],"patients":[148],"the":[150],"All":[151],"Us":[153],"Research":[154],"Program,":[155],"outperforms":[157],"baselines":[159],"for":[160],"type":[162],"classification":[163],"deep":[165],"sequential":[166],"were":[168],"unstable":[169],"our":[171],"cohort.":[172]},"counts_by_year":[],"updated_date":"2026-02-26T06:34:08.959763","created_date":"2026-02-26T00:00:00"}
