{"id":"https://openalex.org/W4410773778","doi":"https://doi.org/10.3390/info16060446","title":"Preprocessing of Physician Notes by LLMs Improves Clinical Concept Extraction Without Information Loss","display_name":"Preprocessing of Physician Notes by LLMs Improves Clinical Concept Extraction Without Information Loss","publication_year":2025,"publication_date":"2025-05-27","ids":{"openalex":"https://openalex.org/W4410773778","doi":"https://doi.org/10.3390/info16060446"},"language":"en","primary_location":{"id":"doi:10.3390/info16060446","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16060446","pdf_url":"https://www.mdpi.com/2078-2489/16/6/446/pdf?version=1748332313","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/16/6/446/pdf?version=1748332313","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046843425","display_name":"Daniel B. Hier","orcid":"https://orcid.org/0000-0002-6179-0793"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Daniel B. Hier","raw_affiliation_strings":["Department of Neurology & Rehabilitation, University of Illinois at Chicago, Chicago, IL 60612, USA"],"raw_orcid":"https://orcid.org/0000-0002-6179-0793","affiliations":[{"raw_affiliation_string":"Department of Neurology & Rehabilitation, University of Illinois at Chicago, Chicago, IL 60612, USA","institution_ids":["https://openalex.org/I39422238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041109604","display_name":"Michael D. Carrithers","orcid":"https://orcid.org/0000-0001-5337-0804"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael A. Carrithers","raw_affiliation_strings":["Department of Neurology & Rehabilitation, University of Illinois at Chicago, Chicago, IL 60612, USA"],"raw_orcid":"https://orcid.org/0000-0001-5337-0804","affiliations":[{"raw_affiliation_string":"Department of Neurology & Rehabilitation, University of Illinois at Chicago, Chicago, IL 60612, USA","institution_ids":["https://openalex.org/I39422238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083683344","display_name":"Steven Keith Platt","orcid":"https://orcid.org/0000-0001-8270-1932"},"institutions":[{"id":"https://openalex.org/I1925986","display_name":"Loyola University Chicago","ror":"https://ror.org/04b6x2g63","country_code":"US","type":"education","lineage":["https://openalex.org/I1925986"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steven K. Platt","raw_affiliation_strings":["Laboratory for Applied Artificial Intelligence, Loyola University Chicago, Chicago, IL 60611, USA"],"raw_orcid":"https://orcid.org/0000-0001-8270-1932","affiliations":[{"raw_affiliation_string":"Laboratory for Applied Artificial Intelligence, Loyola University Chicago, Chicago, IL 60611, USA","institution_ids":["https://openalex.org/I1925986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103716509","display_name":"Anh Quynh Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I1925986","display_name":"Loyola University Chicago","ror":"https://ror.org/04b6x2g63","country_code":"US","type":"education","lineage":["https://openalex.org/I1925986"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anh Nguyen","raw_affiliation_strings":["Laboratory for Applied Artificial Intelligence, Loyola University Chicago, Chicago, IL 60611, USA"],"raw_orcid":"https://orcid.org/0009-0001-8836-7944","affiliations":[{"raw_affiliation_string":"Laboratory for Applied Artificial Intelligence, Loyola University Chicago, Chicago, IL 60611, USA","institution_ids":["https://openalex.org/I1925986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005972266","display_name":"Ioannis Giannopoulos","orcid":"https://orcid.org/0000-0002-2556-5230"},"institutions":[{"id":"https://openalex.org/I1925986","display_name":"Loyola University Chicago","ror":"https://ror.org/04b6x2g63","country_code":"US","type":"education","lineage":["https://openalex.org/I1925986"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ioannis Giannopoulos","raw_affiliation_strings":["Laboratory for Applied Artificial Intelligence, Loyola University Chicago, Chicago, IL 60611, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Laboratory for Applied Artificial Intelligence, Loyola University Chicago, Chicago, IL 60611, USA","institution_ids":["https://openalex.org/I1925986"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040578359","display_name":"Tayo Obafemi-Ajayi","orcid":"https://orcid.org/0000-0002-0155-9733"},"institutions":[{"id":"https://openalex.org/I119942284","display_name":"Missouri State University","ror":"https://ror.org/01d2sez20","country_code":"US","type":"education","lineage":["https://openalex.org/I119942284"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tayo Obafemi-Ajayi","raw_affiliation_strings":["Engineering Program, Missouri State University, Springfield, MO 65897, USA"],"raw_orcid":"https://orcid.org/0000-0002-0155-9733","affiliations":[{"raw_affiliation_string":"Engineering Program, Missouri State University, Springfield, MO 65897, USA","institution_ids":["https://openalex.org/I119942284"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5046843425"],"corresponding_institution_ids":["https://openalex.org/I39422238"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":2.2177,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.87386472,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"16","issue":"6","first_page":"446","last_page":"446"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10350","display_name":"Electronic Health Records Systems","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6317338347434998},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.5108384490013123},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3726884722709656},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.22857362031936646},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.08448395133018494}],"concepts":[{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6317338347434998},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.5108384490013123},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3726884722709656},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.22857362031936646},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.08448395133018494},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/info16060446","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16060446","pdf_url":"https://www.mdpi.com/2078-2489/16/6/446/pdf?version=1748332313","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:bearworks.missouristate.edu:articles00-1141","is_oa":false,"landing_page_url":"https://bearworks.missouristate.edu/articles00/142","pdf_url":null,"source":{"id":"https://openalex.org/S4377196450","display_name":"BearWorks (Missouri State University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I119942284","host_organization_name":"Missouri State University","host_organization_lineage":["https://openalex.org/I119942284"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Faculty Scholarship","raw_type":"text"},{"id":"pmh:oai:doaj.org/article:11fba727df524a25be7df1ba2a00e165","is_oa":true,"landing_page_url":"https://doaj.org/article/11fba727df524a25be7df1ba2a00e165","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 16, Iss 6, p 446 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/info16060446","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info16060446","pdf_url":"https://www.mdpi.com/2078-2489/16/6/446/pdf?version=1748332313","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6064844482","display_name":null,"funder_award_id":"2423235","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4410773778.pdf","grobid_xml":"https://content.openalex.org/works/W4410773778.grobid-xml"},"referenced_works_count":79,"referenced_works":["https://openalex.org/W1481389199","https://openalex.org/W1919152067","https://openalex.org/W1931861466","https://openalex.org/W1982513331","https://openalex.org/W1987811240","https://openalex.org/W1989708585","https://openalex.org/W1997927489","https://openalex.org/W2006642610","https://openalex.org/W2007530867","https://openalex.org/W2013807743","https://openalex.org/W2058985021","https://openalex.org/W2059459859","https://openalex.org/W2093274439","https://openalex.org/W2094101228","https://openalex.org/W2097466502","https://openalex.org/W2165748167","https://openalex.org/W2168091868","https://openalex.org/W2406610483","https://openalex.org/W2407634150","https://openalex.org/W2508212183","https://openalex.org/W2557385283","https://openalex.org/W2588904016","https://openalex.org/W2589829394","https://openalex.org/W2766635664","https://openalex.org/W2781002470","https://openalex.org/W2789414390","https://openalex.org/W2802884616","https://openalex.org/W2805089815","https://openalex.org/W2893443580","https://openalex.org/W2910918344","https://openalex.org/W2911675406","https://openalex.org/W2920129366","https://openalex.org/W2946102094","https://openalex.org/W2951259561","https://openalex.org/W2967690619","https://openalex.org/W2981115805","https://openalex.org/W2984200234","https://openalex.org/W3095952990","https://openalex.org/W3106789863","https://openalex.org/W3174901847","https://openalex.org/W3181361218","https://openalex.org/W4206698485","https://openalex.org/W4214536609","https://openalex.org/W4225764431","https://openalex.org/W4244752586","https://openalex.org/W4283449551","https://openalex.org/W4285315819","https://openalex.org/W4310266205","https://openalex.org/W4328096800","https://openalex.org/W4366350958","https://openalex.org/W4379769651","https://openalex.org/W4379985031","https://openalex.org/W4382930013","https://openalex.org/W4385264896","https://openalex.org/W4386120650","https://openalex.org/W4386794639","https://openalex.org/W4386973901","https://openalex.org/W4387500346","https://openalex.org/W4387861191","https://openalex.org/W4388314304","https://openalex.org/W4391301614","https://openalex.org/W4392477433","https://openalex.org/W4394755432","https://openalex.org/W4394782455","https://openalex.org/W4394879936","https://openalex.org/W4395067526","https://openalex.org/W4399994870","https://openalex.org/W4400052994","https://openalex.org/W4401993703","https://openalex.org/W4405492031","https://openalex.org/W4406431845","https://openalex.org/W4407695342","https://openalex.org/W4410019803","https://openalex.org/W6766960179","https://openalex.org/W6786365459","https://openalex.org/W6839179172","https://openalex.org/W6855104011","https://openalex.org/W6863892538","https://openalex.org/W6864537853"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Clinician":[0],"notes":[1,47,100,125],"are":[2],"a":[3,66,182],"rich":[4],"source":[5],"of":[6,39,73,97,169,181,185],"patient":[7,33],"information,":[8],"but":[9],"often":[10],"contain":[11],"inconsistencies":[12,27],"due":[13],"to":[14,70,119,140],"varied":[15],"writing":[16],"styles,":[17],"abbreviations,":[18],"medical":[19],"jargon,":[20],"grammatical":[21,82],"errors,":[22,83],"and":[23,35,62,81,86,89,127],"non-standard":[24],"formatting.":[25],"These":[26],"hinder":[28],"their":[29,190],"direct":[30],"use":[31],"in":[32,172],"care":[34],"degrade":[36],"the":[37,71,124,167,179],"performance":[38,180],"downstream":[40,110,186],"computational":[41],"applications":[42,187],"that":[43,102,145,153,162,188],"rely":[44],"on":[45],"these":[46],"as":[48,51],"input,":[49],"such":[50],"quality":[52,165],"improvement,":[53],"population":[54],"health":[55],"analytics,":[56],"precision":[57],"medicine,":[58],"clinical":[59,93,173,194],"decision":[60],"support,":[61],"research.":[63],"We":[64,151],"present":[65],"large-language-model":[67],"(LLM)":[68],"approach":[69,176],"preprocessing":[72,155],"1618":[74],"neurology":[75],"notes.":[76,174],"The":[77],"LLM":[78],"corrected":[79],"spelling":[80],"expanded":[84],"acronyms,":[85],"standardized":[87],"terminology":[88],"formatting,":[90],"without":[91],"altering":[92],"content.":[94],"Expert":[95],"review":[96],"randomly":[98],"sampled":[99],"confirmed":[101],"no":[103],"significant":[104],"information":[105],"was":[106],"lost.":[107],"To":[108],"evaluate":[109],"impact,":[111],"we":[112],"applied":[113],"an":[114,157],"ontology-based":[115],"NLP":[116],"pipeline":[117],"(Doc2Hpo)":[118],"extract":[120],"biomedical":[121],"concepts":[122],"from":[123,138,192],"before":[126],"after":[128],"editing.":[129],"F1":[130],"scores":[131],"for":[132],"Human":[133],"Phenotype":[134],"Ontology":[135],"extraction":[136],"improved":[137],"0.40":[139],"0.61,":[141],"confirming":[142],"our":[143],"hypothesis":[144],"better":[146,149],"inputs":[147],"yielded":[148],"outputs.":[150],"conclude":[152],"LLM-based":[154],"is":[156],"effective":[158],"error":[159],"correction":[160],"strategy":[161],"improves":[163],"data":[164],"at":[166],"level":[168],"free":[170],"text":[171],"This":[175],"may":[177],"enhance":[178],"broad":[183],"class":[184],"derive":[189],"input":[191],"unstructured":[193],"documentation.":[195]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-19T17:40:00.097472","created_date":"2025-10-10T00:00:00"}
