{"id":"https://openalex.org/W7160703722","doi":"https://doi.org/10.1016/j.jbi.2026.105053","title":"Optimising clinical information extraction: a comparative study of retrieval-augmented generation techniques in clinical notes","display_name":"Optimising clinical information extraction: a comparative study of retrieval-augmented generation techniques in clinical notes","publication_year":2026,"publication_date":"2026-05-09","ids":{"openalex":"https://openalex.org/W7160703722","doi":"https://doi.org/10.1016/j.jbi.2026.105053","pmid":"https://pubmed.ncbi.nlm.nih.gov/42114794"},"language":"en","primary_location":{"id":"doi:10.1016/j.jbi.2026.105053","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.jbi.2026.105053","pdf_url":null,"source":{"id":"https://openalex.org/S11622463","display_name":"Journal of Biomedical Informatics","issn_l":"1532-0464","issn":["1532-0464","1532-0480"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Biomedical Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1016/j.jbi.2026.105053","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135789620","display_name":"Hengyi Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I204824540","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277","country_code":"AU","type":"education","lineage":["https://openalex.org/I204824540"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Hengyi Zhang","raw_affiliation_strings":["Centre for Digital Transformation, School of Computing and Information Technology, University of Wollongong, Wollongong, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Digital Transformation, School of Computing and Information Technology, University of Wollongong, Wollongong, Australia","institution_ids":["https://openalex.org/I204824540"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135761805","display_name":"Dinithi Vithanage","orcid":null},"institutions":[{"id":"https://openalex.org/I204824540","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277","country_code":"AU","type":"education","lineage":["https://openalex.org/I204824540"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Dinithi Vithanage","raw_affiliation_strings":["Centre for Digital Transformation, School of Computing and Information Technology, University of Wollongong, Wollongong, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Digital Transformation, School of Computing and Information Technology, University of Wollongong, Wollongong, Australia","institution_ids":["https://openalex.org/I204824540"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040465739","display_name":"Chao Deng","orcid":"https://orcid.org/0000-0003-1147-5741"},"institutions":[{"id":"https://openalex.org/I204824540","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277","country_code":"AU","type":"education","lineage":["https://openalex.org/I204824540"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Chao Deng","raw_affiliation_strings":["School of Medical, Indigenous and Health Sciences, University of Wollongong, Wollongong, Australia"],"raw_orcid":"https://orcid.org/0000-0003-1147-5741","affiliations":[{"raw_affiliation_string":"School of Medical, Indigenous and Health Sciences, University of Wollongong, Wollongong, Australia","institution_ids":["https://openalex.org/I204824540"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5135781471","display_name":"Ping Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I204824540","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277","country_code":"AU","type":"education","lineage":["https://openalex.org/I204824540"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ping Yu","raw_affiliation_strings":["Centre for Digital Transformation, School of Computing and Information Technology, University of Wollongong, Wollongong, Australia. Electronic address: ping@uow.edu.au"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Digital Transformation, School of Computing and Information Technology, University of Wollongong, Wollongong, Australia. Electronic address: ping@uow.edu.au","institution_ids":["https://openalex.org/I204824540"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":2980,"currency":"USD","value_usd":2980},"apc_paid":{"value":2980,"currency":"USD","value_usd":2980},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.69517161,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"180","issue":null,"first_page":"105053","last_page":"105053"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.22669999301433563,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.22669999301433563,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10350","display_name":"Electronic Health Records Systems","score":0.08760000020265579,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12790","display_name":"Nursing Diagnosis and Documentation","score":0.08229999989271164,"subfield":{"id":"https://openalex.org/subfields/2910","display_name":"Issues, ethics and legal aspects"},"field":{"id":"https://openalex.org/fields/29","display_name":"Nursing"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/health-informatics","display_name":"Health informatics","score":0.3061000108718872},{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.2976999878883362},{"id":"https://openalex.org/keywords/information-system","display_name":"Information system","score":0.2736000120639801},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.24140000343322754}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5960999727249146},{"id":"https://openalex.org/C19527891","wikidata":"https://www.wikidata.org/wiki/Q1120908","display_name":"Medical physics","level":1,"score":0.3774000108242035},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.33390000462532043},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3285999894142151},{"id":"https://openalex.org/C145642194","wikidata":"https://www.wikidata.org/wiki/Q870895","display_name":"Health informatics","level":3,"score":0.3061000108718872},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C180198813","wikidata":"https://www.wikidata.org/wiki/Q121182","display_name":"Information system","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2676999866962433},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2628999948501587},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.24140000343322754}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1016/j.jbi.2026.105053","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.jbi.2026.105053","pdf_url":null,"source":{"id":"https://openalex.org/S11622463","display_name":"Journal of Biomedical Informatics","issn_l":"1532-0464","issn":["1532-0464","1532-0480"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Biomedical Informatics","raw_type":"journal-article"},{"id":"pmid:42114794","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/42114794","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of biomedical informatics","raw_type":null}],"best_oa_location":{"id":"doi:10.1016/j.jbi.2026.105053","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.jbi.2026.105053","pdf_url":null,"source":{"id":"https://openalex.org/S11622463","display_name":"Journal of Biomedical Informatics","issn_l":"1532-0464","issn":["1532-0464","1532-0480"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Biomedical Informatics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.49752429127693176,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W971231729","https://openalex.org/W1596418779","https://openalex.org/W1606480292","https://openalex.org/W1861747734","https://openalex.org/W2018850751","https://openalex.org/W2034849674","https://openalex.org/W2038636594","https://openalex.org/W2087388117","https://openalex.org/W2096252540","https://openalex.org/W2132314908","https://openalex.org/W2170907675","https://openalex.org/W2770445088","https://openalex.org/W2901143721","https://openalex.org/W2911489562","https://openalex.org/W2966351171","https://openalex.org/W2966867036","https://openalex.org/W2998702515","https://openalex.org/W3034383590","https://openalex.org/W3106234277","https://openalex.org/W3155807546","https://openalex.org/W3157758108","https://openalex.org/W3201174429","https://openalex.org/W4206579740","https://openalex.org/W4226118367","https://openalex.org/W4285240908","https://openalex.org/W4309674289","https://openalex.org/W4380028647","https://openalex.org/W4390545867","https://openalex.org/W4399660853","https://openalex.org/W4403487626","https://openalex.org/W4404783267","https://openalex.org/W4406288888","https://openalex.org/W4406596702","https://openalex.org/W4407766364","https://openalex.org/W4411486861","https://openalex.org/W4413838611","https://openalex.org/W7148355944"],"related_works":[],"abstract_inverted_index":{"Extracting":[0],"clinically":[1],"meaningful":[2],"information":[3,61,254,292],"from":[4,68],"free-text":[5],"notes":[6,67],"in":[7,130,221],"specific":[8],"clinical":[9,60,66,122,222,253,267,291],"settings,":[10],"such":[11],"as":[12,97],"Australian":[13,69],"aged":[14,70],"care":[15,71],"facilities,":[16,72],"remains":[17,53],"challenging":[18],"due":[19],"to":[20],"the":[21,37,48,185,202,247],"heterogeneity":[22],"of":[23,41,50,103,249,265],"text":[24],"documents,":[25],"which":[26],"lack":[27],"standardised":[28],"structure":[29],"and":[30,39,105,112,135,154,163,173,193,230,233,280,295],"terminology.":[31],"Retrieval-augmented":[32],"generation":[33],"(RAG)":[34],"can":[35],"improve":[36],"precision":[38],"grounding":[40],"large":[42],"language":[43],"model":[44],"(LLM)":[45],"outputs;":[46],"however,":[47],"choice":[49],"retrieval":[51,77,85,88,92,175,244],"strategies":[52,119,166],"understudied":[54],"despite":[55],"its":[56],"critical":[57],"importance":[58],"for":[59,252,261,289],"extraction":[62,293],"(IE).":[63],"Using":[64],"real-world":[65],"we":[73,215],"systematically":[74],"compare":[75],"six":[76],"methods":[78,200],"within":[79],"a":[80,258,286],"unified":[81],"RAG":[82,236,250],"pipeline:":[83],"sparse":[84,104,171],"(BM25),":[86],"dense":[87,91,98,106,174,210],"(bi-encoder":[89],"embeddings),":[90],"with":[93],"cross-encoder":[94],"rerank":[95],"(abbreviated":[96],"reranking),":[99],"dynamic":[100],"linear":[101],"fusion":[102,110],"scores,":[107],"reciprocal":[108],"rank":[109],"(RRF),":[111],"hybrid":[113,164],"coarse-to-fine":[114],"reranking.":[115,211],"We":[116],"evaluate":[117],"these":[118],"on":[120],"two":[121],"named":[123,223],"entity":[124,224],"recognition":[125],"tasks,":[126],"extracting":[127],"agitation":[128,180],"symptoms":[129],"dementia":[131],"(n":[132,140],"=":[133,141],"208)":[134,142],"identifying":[136],"malnutrition":[137],"risk":[138],"factors":[139],"across":[143,176],"five":[144],"dimensions:":[145],"context":[146,276],"relevance,":[147],"answer":[148],"quality,":[149],"source":[150],"faithfulness,":[151],"contextual":[152],"diversity,":[153],"item-level":[155],"accuracy.":[156],"A":[157],"repeated-measures":[158],"ANOVA":[159],"reveals":[160],"that":[161,242],"reranking":[162,183],"ensemble":[165,199],"significantly":[167],"outperform":[168],"both":[169,177],"standalone":[170],"(BM25)":[172],"tasks.":[178],"For":[179,197],"extraction,":[181],"Dense":[182],"achieves":[184],"highest":[186],"Answer":[187],"F1":[188],"(0.946),":[189],"Context":[190],"Diversity":[191],"(0.895)":[192],"Item-level":[194,204],"Accuracy":[195,205],"(0.963).":[196],"malnutrition,":[198],"yield":[201],"best":[203],"(0.944),":[206],"followed":[207],"closely":[208],"by":[209],"Through":[212],"error":[213,219],"analysis,":[214],"identify":[216],"three":[217],"LLM":[218,278],"types":[220],"recognition:":[225],"intrinsic":[226],"hallucination,":[227,229],"extrinsic":[228],"false":[231],"negatives,":[232],"elucidate":[234],"how":[235],"mitigates":[237],"each.":[238],"These":[239],"findings":[240],"demonstrate":[241],"reranking-based":[243],"substantially":[245],"enhances":[246],"performance":[248],"pipelines":[251],"extraction.":[255],"It":[256],"offers":[257],"practical":[259],"approach":[260],"improving":[262],"automated":[263],"analysis":[264],"unstructured":[266],"text.":[268],"Our":[269],"four-stage":[270],"experimental":[271],"workflow":[272],"-":[273,284],"document":[274],"indexing,":[275],"retrieval,":[277],"generation,":[279],"structured":[281],"output":[282],"formatting":[283],"provides":[285],"replicable":[287],"framework":[288],"future":[290],"research":[294],"downstream":[296],"predictive":[297],"modelling.":[298]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-10T00:00:00"}
