{"id":"https://openalex.org/W4361019623","doi":"https://doi.org/10.48550/arxiv.2303.13722","title":"Natural language processing to automatically extract the presence and severity of esophagitis in notes of patients undergoing radiotherapy","display_name":"Natural language processing to automatically extract the presence and severity of esophagitis in notes of patients undergoing radiotherapy","publication_year":2023,"publication_date":"2023-03-24","ids":{"openalex":"https://openalex.org/W4361019623","doi":"https://doi.org/10.48550/arxiv.2303.13722"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2303.13722","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.13722","pdf_url":"https://arxiv.org/pdf/2303.13722","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2303.13722","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100369744","display_name":"Shan Chen","orcid":"https://orcid.org/0000-0003-0912-7254"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Shan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054676618","display_name":"Marco Guevara","orcid":"https://orcid.org/0000-0003-2599-3312"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guevara, Marco","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101762536","display_name":"Nicol\u00e1s David Ram\u00edrez","orcid":"https://orcid.org/0000-0001-9762-5637"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramirez, Nicolas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071415610","display_name":"Arpi Murray","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Murray, Arpi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044194506","display_name":"Jeremy L. Warner","orcid":"https://orcid.org/0000-0002-2851-7242"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Warner, Jeremy L.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001452511","display_name":"Hugo J.W.L. Aerts","orcid":"https://orcid.org/0000-0002-2122-2003"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aerts, Hugo JWL","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102001875","display_name":"Timothy A. Miller","orcid":"https://orcid.org/0000-0003-4513-403X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miller, Timothy A.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087865794","display_name":"Guergana Savova","orcid":"https://orcid.org/0000-0002-5887-200X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Savova, Guergana K.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064724289","display_name":"Raymond H. Mak","orcid":"https://orcid.org/0000-0002-8754-0565"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mak, Raymond H.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5039369605","display_name":"Danielle S. Bitterman","orcid":"https://orcid.org/0000-0003-0345-2232"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bitterman, Danielle S.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5100369744"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/esophagitis","display_name":"Esophagitis","score":0.7866008877754211},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6631237268447876},{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.5992902517318726},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.5596204996109009},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4796772003173828},{"id":"https://openalex.org/keywords/transferability","display_name":"Transferability","score":0.44069138169288635},{"id":"https://openalex.org/keywords/toxicity","display_name":"Toxicity","score":0.42181092500686646},{"id":"https://openalex.org/keywords/internal-medicine","display_name":"Internal medicine","score":0.41939598321914673},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3588152527809143},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.20527973771095276},{"id":"https://openalex.org/keywords/reflux","display_name":"Reflux","score":0.12011668086051941}],"concepts":[{"id":"https://openalex.org/C2779920096","wikidata":"https://www.wikidata.org/wiki/Q298230","display_name":"Esophagitis","level":4,"score":0.7866008877754211},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6631237268447876},{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.5992902517318726},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.5596204996109009},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4796772003173828},{"id":"https://openalex.org/C61272859","wikidata":"https://www.wikidata.org/wiki/Q7834031","display_name":"Transferability","level":3,"score":0.44069138169288635},{"id":"https://openalex.org/C29730261","wikidata":"https://www.wikidata.org/wiki/Q274160","display_name":"Toxicity","level":2,"score":0.42181092500686646},{"id":"https://openalex.org/C126322002","wikidata":"https://www.wikidata.org/wiki/Q11180","display_name":"Internal medicine","level":1,"score":0.41939598321914673},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3588152527809143},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.20527973771095276},{"id":"https://openalex.org/C43270747","wikidata":"https://www.wikidata.org/wiki/Q898610","display_name":"Reflux","level":3,"score":0.12011668086051941},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C2779134260","wikidata":"https://www.wikidata.org/wiki/Q12136","display_name":"Disease","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C140331021","wikidata":"https://www.wikidata.org/wiki/Q1868104","display_name":"Logit","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2303.13722","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.13722","pdf_url":"https://arxiv.org/pdf/2303.13722","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2303.13722","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2303.13722","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2303.13722","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.13722","pdf_url":"https://arxiv.org/pdf/2303.13722","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.8399999737739563,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4361019623.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2161221533","https://openalex.org/W4229699405","https://openalex.org/W1666484574","https://openalex.org/W2216382288","https://openalex.org/W2355491300","https://openalex.org/W4234629551","https://openalex.org/W2030816003","https://openalex.org/W2011110943","https://openalex.org/W2028856635","https://openalex.org/W2011433332"],"abstract_inverted_index":{"Radiotherapy":[0],"(RT)":[1],"toxicities":[2],"can":[3],"impair":[4],"survival":[5],"and":[6,41,56,76,112,118,159,165],"quality-of-life,":[7],"yet":[8],"remain":[9],"under-studied.":[10],"Real-world":[11],"evidence":[12],"holds":[13],"potential":[14],"to":[15,37,179,186],"improve":[16],"our":[17,172],"understanding":[18],"of":[19,43,47,68],"toxicities,":[20],"but":[21],"toxicity":[22,183,201],"information":[23],"is":[24,175],"often":[25],"only":[26],"in":[27,203],"clinical":[28],"notes.":[29,191],"We":[30,53],"developed":[31],"natural":[32],"language":[33],"processing":[34],"(NLP)":[35],"models":[36,59],"identify":[38],"the":[39,103,122,140,149,153,176],"presence":[40,67],"severity":[42,184],"esophagitis":[44,62,73,80,182],"from":[45,93,189],"notes":[46,92],"patients":[48,94],"treated":[49],"with":[50,95],"thoracic":[51],"RT.":[52,99],"fine-tuned":[54],"statistical":[55],"pre-trained":[57],"BERT-based":[58],"for":[60,114,134,161,197],"three":[61],"classification":[63],"tasks:":[64],"Task":[65,70,77,115,162],"1)":[66],"esophagitis,":[69],"2)":[71],"severe":[72],"or":[74],"not,":[75],"3)":[78],"no":[79],"vs.":[81,84],"grade":[82,85],"1":[83],"2-3.":[86],"Transferability":[87],"was":[88,109,156],"tested":[89],"on":[90],"345":[91],"esophageal":[96,150],"cancer":[97,151],"undergoing":[98],"Fine-tuning":[100],"PubmedBERT":[101],"yielded":[102],"best":[104,107,154],"performance.":[105],"The":[106,192],"macro-F1":[108,130,141,155],"0.92,":[110],"0.82,":[111],"0.74":[113],"1,":[116,163],"2,":[117,164],"3,":[119,166],"respectively.":[120],"Selecting":[121],"most":[123],"informative":[124],"note":[125],"sections":[126],"during":[127],"fine-tuning":[128],"improved":[129,139],"by":[131,142],"over":[132,143],"2%":[133],"all":[135,146],"tasks.":[136,147],"Silver-labeled":[137],"data":[138],"3%":[144],"across":[145],"For":[148],"notes,":[152],"0.73,":[157],"0.74,":[158],"0.65":[160],"respectively,":[167],"without":[168],"additional":[169],"fine-tuning.":[170],"To":[171],"knowledge,":[173],"this":[174],"first":[177],"effort":[178],"automatically":[180],"extract":[181],"according":[185],"CTCAE":[187],"guidelines":[188],"clinic":[190],"promising":[193],"performance":[194],"provides":[195],"proof-of-concept":[196],"NLP-based":[198],"automated":[199],"detailed":[200],"monitoring":[202],"expanded":[204],"domains.":[205]},"counts_by_year":[],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
