{"id":"https://openalex.org/W4389708586","doi":"https://doi.org/10.48550/arxiv.2312.06457","title":"Large Language Models with Retrieval-Augmented Generation for Zero-Shot Disease Phenotyping","display_name":"Large Language Models with Retrieval-Augmented Generation for Zero-Shot Disease Phenotyping","publication_year":2023,"publication_date":"2023-12-11","ids":{"openalex":"https://openalex.org/W4389708586","doi":"https://doi.org/10.48550/arxiv.2312.06457"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2312.06457","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2312.06457","pdf_url":"https://arxiv.org/pdf/2312.06457","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2312.06457","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044008294","display_name":"Will Thompson","orcid":"https://orcid.org/0000-0002-1353-2562"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thompson, Will E.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001397941","display_name":"David Vidmar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vidmar, David M.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084566613","display_name":"Jessica K. De Freitas","orcid":"https://orcid.org/0000-0001-8546-9112"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"De Freitas, Jessica K.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019370037","display_name":"John M. Pfeifer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pfeifer, John M.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066640136","display_name":"Brandon K. Fornwalt","orcid":"https://orcid.org/0000-0002-6231-9442"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fornwalt, Brandon K.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091530162","display_name":"Ruijun Chen","orcid":"https://orcid.org/0000-0001-5281-4143"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ruijun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077093430","display_name":"Gabriel Altay","orcid":"https://orcid.org/0000-0002-4120-2907"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Altay, Gabriel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037550387","display_name":"Kabir Manghnani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Manghnani, Kabir","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065493948","display_name":"Andrew C. Nelsen","orcid":"https://orcid.org/0000-0001-5956-7026"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nelsen, Andrew C.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043656516","display_name":"Kellie Morland","orcid":"https://orcid.org/0000-0003-1800-7983"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Morland, Kellie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021041237","display_name":"Martin C. Stumpe","orcid":"https://orcid.org/0000-0003-1402-6749"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stumpe, Martin C.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5001694059","display_name":"Riccardo Miotto","orcid":"https://orcid.org/0000-0002-7815-6000"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miotto, Riccardo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.972599983215332,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.972599983215332,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9666000008583069,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6670953631401062},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.665480375289917},{"id":"https://openalex.org/keywords/disease","display_name":"Disease","score":0.5513741374015808},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.546997606754303},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.5192875266075134},{"id":"https://openalex.org/keywords/clinical-phenotype","display_name":"Clinical phenotype","score":0.5058128833770752},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.502483606338501},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4760470390319824},{"id":"https://openalex.org/keywords/electronic-health-record","display_name":"Electronic health record","score":0.461193323135376},{"id":"https://openalex.org/keywords/health-records","display_name":"Health records","score":0.4420072138309479},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4254826009273529},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.40834566950798035},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.40040040016174316},{"id":"https://openalex.org/keywords/health-care","display_name":"Health care","score":0.3516806364059448},{"id":"https://openalex.org/keywords/phenotype","display_name":"Phenotype","score":0.19783419370651245},{"id":"https://openalex.org/keywords/pathology","display_name":"Pathology","score":0.1843578815460205}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6670953631401062},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.665480375289917},{"id":"https://openalex.org/C2779134260","wikidata":"https://www.wikidata.org/wiki/Q12136","display_name":"Disease","level":2,"score":0.5513741374015808},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.546997606754303},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.5192875266075134},{"id":"https://openalex.org/C3020646490","wikidata":"https://www.wikidata.org/wiki/Q25203551","display_name":"Clinical phenotype","level":4,"score":0.5058128833770752},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.502483606338501},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4760470390319824},{"id":"https://openalex.org/C3020144179","wikidata":"https://www.wikidata.org/wiki/Q10871684","display_name":"Electronic health record","level":3,"score":0.461193323135376},{"id":"https://openalex.org/C3019952477","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Health records","level":3,"score":0.4420072138309479},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4254826009273529},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.40834566950798035},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40040040016174316},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.3516806364059448},{"id":"https://openalex.org/C127716648","wikidata":"https://www.wikidata.org/wiki/Q104053","display_name":"Phenotype","level":3,"score":0.19783419370651245},{"id":"https://openalex.org/C142724271","wikidata":"https://www.wikidata.org/wiki/Q7208","display_name":"Pathology","level":1,"score":0.1843578815460205},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2312.06457","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2312.06457","pdf_url":"https://arxiv.org/pdf/2312.06457","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2312.06457","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2312.06457","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2312.06457","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2312.06457","pdf_url":"https://arxiv.org/pdf/2312.06457","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5799999833106995}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4389708586.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4386543168","https://openalex.org/W187932805","https://openalex.org/W4392490004","https://openalex.org/W1641026212","https://openalex.org/W2911982698","https://openalex.org/W2323588885","https://openalex.org/W3047677938","https://openalex.org/W2087134418","https://openalex.org/W2078646730","https://openalex.org/W4312053962"],"abstract_inverted_index":{"Identifying":[0],"disease":[1,96,124],"phenotypes":[2],"from":[3],"electronic":[4],"health":[5],"records":[6],"(EHRs)":[7],"is":[8,20],"critical":[9],"for":[10,23,77],"numerous":[11],"secondary":[12],"uses.":[13],"Manually":[14],"encoding":[15],"physician":[16,107],"knowledge":[17],"into":[18],"rules":[19,109],"particularly":[21],"challenging":[22],"rare":[24,95,123],"diseases":[25],"due":[26],"to":[27,70,80,90,121],"inadequate":[28],"EHR":[29],"coding,":[30],"necessitating":[31],"review":[32],"of":[33,112,130],"clinical":[34,51,132],"notes.":[35],"Large":[36],"language":[37],"models":[38],"(LLMs)":[39],"offer":[40],"promise":[41],"in":[42,73,102],"text":[43,68],"understanding":[44],"but":[45],"may":[46],"not":[47],"efficiently":[48],"handle":[49],"real-world":[50],"documentation.":[52],"We":[53,83],"propose":[54],"a":[55,94],"zero-shot":[56],"LLM-based":[57],"method":[58,87,117],"enriched":[59],"by":[60,98],"retrieval-augmented":[61],"generation":[62],"and":[63,134],"MapReduce,":[64],"which":[65],"pre-identifies":[66],"disease-related":[67],"snippets":[69],"be":[71],"used":[72],"parallel":[74],"as":[75,88],"queries":[76],"the":[78,103,119,128],"LLM":[79],"establish":[81],"diagnosis.":[82],"show":[84],"that":[85],"this":[86],"applied":[89],"pulmonary":[91],"hypertension":[92],"(PH),":[93],"characterized":[97],"elevated":[99],"arterial":[100],"pressures":[101],"lungs,":[104],"significantly":[105],"outperforms":[106],"logic":[108],"($F_1$":[110],"score":[111],"0.62":[113],"vs.":[114],"0.75).":[115],"This":[116],"has":[118],"potential":[120],"enhance":[122],"cohort":[125],"identification,":[126],"expanding":[127],"scope":[129],"robust":[131],"research":[133],"care":[135],"gap":[136],"identification.":[137]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":10}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
