{"id":"https://openalex.org/W4412537812","doi":"https://doi.org/10.1038/s41746-025-01869-8","title":"Enhancing EHR-based pancreatic cancer prediction with LLM-derived embeddings","display_name":"Enhancing EHR-based pancreatic cancer prediction with LLM-derived embeddings","publication_year":2025,"publication_date":"2025-07-21","ids":{"openalex":"https://openalex.org/W4412537812","doi":"https://doi.org/10.1038/s41746-025-01869-8","pmid":"https://pubmed.ncbi.nlm.nih.gov/40691317"},"language":"en","primary_location":{"id":"doi:10.1038/s41746-025-01869-8","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s41746-025-01869-8","pdf_url":"https://www.nature.com/articles/s41746-025-01869-8.pdf","source":{"id":"https://openalex.org/S4210195431","display_name":"npj Digital Medicine","issn_l":"2398-6352","issn":["2398-6352"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"npj Digital Medicine","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.nature.com/articles/s41746-025-01869-8.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043308629","display_name":"Jiheum Park","orcid":"https://orcid.org/0000-0003-4919-7249"},"institutions":[{"id":"https://openalex.org/I2799503643","display_name":"Columbia University Irving Medical Center","ror":"https://ror.org/01esghr10","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2799503643"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jiheum Park","raw_affiliation_strings":["Department of Medicine, Columbia University Irving Medical Center, New York, NY, USA. jp4147@cumc.columbia.edu"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Medicine, Columbia University Irving Medical Center, New York, NY, USA. jp4147@cumc.columbia.edu","institution_ids":["https://openalex.org/I2799503643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077766702","display_name":"Jason Patterson","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Patterson","raw_affiliation_strings":["Department of Biomedical Informatics, Columbia University, New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Biomedical Informatics, Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105962116","display_name":"Jose M. Acitores Cortina","orcid":null},"institutions":[{"id":"https://openalex.org/I1282927834","display_name":"Cedars-Sinai Medical Center","ror":"https://ror.org/02pammg90","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1282927834"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jose M. Acitores Cortina","raw_affiliation_strings":["Cedars-Sinai Cancer, Cedars-Sinai Medical Center, Los Angeles, CA, USA","Department of Computational Biomedicine, Cedars-Sinai Medical Center, Los Angeles, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cedars-Sinai Cancer, Cedars-Sinai Medical Center, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1282927834"]},{"raw_affiliation_string":"Department of Computational Biomedicine, Cedars-Sinai Medical Center, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1282927834"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040744605","display_name":"Tian Gu","orcid":"https://orcid.org/0000-0003-3989-6927"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tian Gu","raw_affiliation_strings":["Department of Biostatistics, Columbia Mailman School of Public Health, New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Biostatistics, Columbia Mailman School of Public Health, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055200350","display_name":"Chin Hur","orcid":"https://orcid.org/0000-0002-2819-7576"},"institutions":[{"id":"https://openalex.org/I2799503643","display_name":"Columbia University Irving Medical Center","ror":"https://ror.org/01esghr10","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2799503643"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chin Hur","raw_affiliation_strings":["Department of Medicine, Columbia University Irving Medical Center, New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Medicine, Columbia University Irving Medical Center, New York, NY, USA","institution_ids":["https://openalex.org/I2799503643"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086753691","display_name":"Nicholas P. Tatonetti","orcid":"https://orcid.org/0000-0002-2700-2597"},"institutions":[{"id":"https://openalex.org/I1282927834","display_name":"Cedars-Sinai Medical Center","ror":"https://ror.org/02pammg90","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1282927834"]},{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nicholas Tatonetti","raw_affiliation_strings":["Cedars-Sinai Cancer, Cedars-Sinai Medical Center, Los Angeles, CA, USA","Department of Biomedical Informatics, Columbia University, New York, NY, USA","Department of Computational Biomedicine, Cedars-Sinai Medical Center, Los Angeles, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cedars-Sinai Cancer, Cedars-Sinai Medical Center, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1282927834"]},{"raw_affiliation_string":"Department of Biomedical Informatics, Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]},{"raw_affiliation_string":"Department of Computational Biomedicine, Cedars-Sinai Medical Center, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1282927834"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5043308629"],"corresponding_institution_ids":["https://openalex.org/I2799503643"],"apc_list":{"value":3060,"currency":"USD","value_usd":3060},"apc_paid":{"value":3060,"currency":"USD","value_usd":3060},"fwci":15.0671,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.98836306,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"8","issue":"1","first_page":"465","last_page":"465"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10231","display_name":"Pancreatic and Hepatic Oncology Research","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2730","display_name":"Oncology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12422","display_name":"Radiomics and Machine Learning in Medical Imaging","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pancreatic-cancer","display_name":"Pancreatic cancer","score":0.6244603395462036},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.40504467487335205},{"id":"https://openalex.org/keywords/cancer","display_name":"Cancer","score":0.380356103181839},{"id":"https://openalex.org/keywords/internal-medicine","display_name":"Internal medicine","score":0.36981433629989624},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.3567352890968323}],"concepts":[{"id":"https://openalex.org/C2780210213","wikidata":"https://www.wikidata.org/wiki/Q212961","display_name":"Pancreatic cancer","level":3,"score":0.6244603395462036},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.40504467487335205},{"id":"https://openalex.org/C121608353","wikidata":"https://www.wikidata.org/wiki/Q12078","display_name":"Cancer","level":2,"score":0.380356103181839},{"id":"https://openalex.org/C126322002","wikidata":"https://www.wikidata.org/wiki/Q11180","display_name":"Internal medicine","level":1,"score":0.36981433629989624},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3567352890968323}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1038/s41746-025-01869-8","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s41746-025-01869-8","pdf_url":"https://www.nature.com/articles/s41746-025-01869-8.pdf","source":{"id":"https://openalex.org/S4210195431","display_name":"npj Digital Medicine","issn_l":"2398-6352","issn":["2398-6352"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"npj Digital Medicine","raw_type":"journal-article"},{"id":"pmid:40691317","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40691317","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"NPJ digital medicine","raw_type":null},{"id":"pmh:oai:doaj.org/article:254ee181164c40d5b13d2ff441ae5bc2","is_oa":true,"landing_page_url":"https://doaj.org/article/254ee181164c40d5b13d2ff441ae5bc2","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"npj Digital Medicine, Vol 8, Iss 1, Pp 1-9 (2025)","raw_type":"article"},{"id":"pmh:oai:europepmc.org:11091945","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12280092","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1038/s41746-025-01869-8","is_oa":true,"landing_page_url":"https://doi.org/10.1038/s41746-025-01869-8","pdf_url":"https://www.nature.com/articles/s41746-025-01869-8.pdf","source":{"id":"https://openalex.org/S4210195431","display_name":"npj Digital Medicine","issn_l":"2398-6352","issn":["2398-6352"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"npj Digital Medicine","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Good health and well-being","id":"https://metadata.un.org/sdg/3","score":0.4300000071525574}],"awards":[{"id":"https://openalex.org/G5662783340","display_name":null,"funder_award_id":"K25 CA267052","funder_id":"https://openalex.org/F4320337351","funder_display_name":"National Cancer Institute"},{"id":"https://openalex.org/G5733695112","display_name":null,"funder_award_id":"K25CA267052","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G8649923797","display_name":null,"funder_award_id":"K25CA267052","funder_id":"https://openalex.org/F4320337351","funder_display_name":"National Cancer Institute"}],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337351","display_name":"National Cancer Institute","ror":"https://ror.org/040gcmg81"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412537812.pdf","grobid_xml":"https://content.openalex.org/works/W4412537812.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W2951635356","https://openalex.org/W2965010580","https://openalex.org/W3044633591","https://openalex.org/W3118983547","https://openalex.org/W4281256872","https://openalex.org/W4283362336","https://openalex.org/W4303699900","https://openalex.org/W4321764356","https://openalex.org/W4375858857","https://openalex.org/W4387819650","https://openalex.org/W4390745503","https://openalex.org/W4394782455","https://openalex.org/W4399738139","https://openalex.org/W4400378068","https://openalex.org/W4406431707"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W3031052312","https://openalex.org/W4389568370","https://openalex.org/W3032375762","https://openalex.org/W1995515455","https://openalex.org/W2080531066","https://openalex.org/W3108674512","https://openalex.org/W1506200166"],"abstract_inverted_index":{"Pancreatic":[0],"cancer":[1],"(PC)":[2],"is":[3],"often":[4],"diagnosed":[5],"late,":[6],"as":[7,138],"early":[8,39],"symptoms":[9],"and":[10,16,68,82,99,116],"effective":[11],"screening":[12],"tools":[13],"are":[14],"lacking,":[15],"genetic":[17,127],"or":[18,125],"familial":[19],"factors":[20,114,124],"explain":[21],"only":[22],"~10%":[23],"of":[24,52],"cases.":[25],"Leveraging":[26],"longitudinal":[27],"electronic":[28],"health":[29],"record":[30],"(EHR)":[31],"data":[32,88],"may":[33,136],"offer":[34],"a":[35,43,104],"promising":[36],"avenue":[37],"for":[38,142],"detection.":[40],"We":[41],"developed":[42],"predictive":[44,107],"model":[45,49,102,135],"using":[46,111],"large":[47],"language":[48],"(LLM)-derived":[50],"embeddings":[51,72],"medical":[53],"condition":[54],"names":[55],"to":[56,80,84,97],"enhance":[57],"learning":[58],"from":[59,78,89],"EHR":[60],"data.":[61],"Across":[62],"two":[63],"sites-Columbia":[64],"University":[65],"Medical":[66,70],"Center":[67],"Cedars-Sinai":[69],"Center-LLM":[71],"improved":[73,95],"6-12":[74],"month":[75],"prediction":[76],"AUROCs":[77,96],"0.60":[79],"0.67":[81],"0.82":[83,98],"0.86,":[85],"respectively.":[86],"Excluding":[87],"0-3":[90],"months":[91],"before":[92],"diagnosis":[93],"further":[94],"0.89.":[100],"Our":[101],"achieved":[103],"higher":[105],"positive":[106],"value":[108],"(0.141)":[109],"than":[110],"traditional":[112],"risk":[113,123],"(0.004),":[115],"identified":[117],"many":[118],"PC":[119],"patients":[120],"without":[121],"these":[122],"known":[126],"variants.":[128],"These":[129],"findings":[130],"suggest":[131],"that":[132],"the":[133],"EHR-based":[134],"serve":[137],"an":[139],"independent":[140],"approach":[141],"identifying":[143],"high-risk":[144],"individuals.":[145]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":4}],"updated_date":"2026-07-01T08:55:40.977307","created_date":"2025-10-10T00:00:00"}
