{"id":"https://openalex.org/W4412888093","doi":"https://doi.org/10.18653/v1/2025.findings-acl.775","title":"The Effectiveness of Uncased Tokeniziaion for Clinical Notes","display_name":"The Effectiveness of Uncased Tokeniziaion for Clinical Notes","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412888093","doi":"https://doi.org/10.18653/v1/2025.findings-acl.775"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.775","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.775","pdf_url":"https://aclanthology.org/2025.findings-acl.775.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.775.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119181328","display_name":"Cory Paik","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cory Paik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5119181329","display_name":"Katharina Von Der Wense","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Katharina Von Der Wense","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15227976,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"14986","last_page":"14992"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.6416000127792358,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.6416000127792358,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.5996000170707703,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11744","display_name":"Health Sciences Research and Education","score":0.5695000290870667,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5350400805473328}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5350400805473328}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.775","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.775","pdf_url":"https://aclanthology.org/2025.findings-acl.775.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.775","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.775","pdf_url":"https://aclanthology.org/2025.findings-acl.775.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.47999998927116394,"id":"https://metadata.un.org/sdg/2","display_name":"Zero hunger"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412888093.pdf","grobid_xml":"https://content.openalex.org/works/W4412888093.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0],"impact":[1],"of":[2,99],"case-sensitive":[3,108,127],"tokenization":[4,54,74],"on":[5,102,106,126],"clinical":[6,12,86,103],"notes":[7,13],"is":[8,41,79],"not":[9],"well":[10],"understood.While":[11],"share":[14],"similarities":[15],"with":[16],"biomedical":[17,84],"text":[18],"in":[19,28,83],"terminology,":[20],"they":[21],"often":[22],"lack":[23],"the":[24,97],"proper":[25],"casing":[26,49],"found":[27],"polished":[29],"publications.Language":[30],"models,":[31],"unlike":[32],"humans,":[33],"require":[34],"a":[35,42],"fixed":[36],"vocabulary":[37],"and":[38,55,62,85,115,128],"case":[39,119],"sensitivity":[40],"tradeoff":[43],"that":[44,93],"must":[45],"be":[46],"considered":[47],"carefully.Improper":[48],"can":[50],"lead":[51],"to":[52,121],"sub-optimal":[53],"increased":[56],"sequence":[57],"length,":[58],"degrading":[59],"downstream":[60],"performance":[61,98,125],"increasing":[63],"computational":[64],"costs.While":[65],"most":[66],"recent":[67],"opendomain":[68],"encoder":[69],"language":[70],"models":[71,95,101],"use":[72],"uncased":[73,94],"for":[75],"all":[76],"tasks,":[77],"there":[78],"no":[80],"clear":[81],"trend":[82],"models.In":[87],"this":[88],"work":[89],"we":[90],"(1)":[91],"show":[92],"exceed":[96],"cased":[100],"notes,":[104],"even":[105],"traditionally":[107],"tasks":[109],"such":[110],"as":[111],"named":[112],"entity":[113],"recognition":[114],"(2)":[116],"introduce":[117],"independent":[118],"encoding":[120],"better":[122],"balance":[123],"model":[124],"improperly-cased":[129],"tasks.":[130]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
