{"id":"https://openalex.org/W7130577463","doi":"https://doi.org/10.48550/arxiv.2602.15852","title":"Building Safe and Deployable Clinical Natural Language Processing under Temporal Leakage Constraints","display_name":"Building Safe and Deployable Clinical Natural Language Processing under Temporal Leakage Constraints","publication_year":2026,"publication_date":"2026-01-24","ids":{"openalex":"https://openalex.org/W7130577463","doi":"https://doi.org/10.48550/arxiv.2602.15852"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.15852","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123956726","display_name":"Ha Na Cho","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cho, Ha Na","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119101223","display_name":"Sairam Sutari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sutari, Sairam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126414081","display_name":"Alexander Lopez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lopez, Alexander","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108938746","display_name":"Hansen Bow","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bow, Hansen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126395953","display_name":"Kai Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Kai","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5123956726"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.6388000249862671,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.6388000249862671,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.15889999270439148,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.04190000146627426,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.7788000106811523},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6891000270843506},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5708000063896179},{"id":"https://openalex.org/keywords/audit","display_name":"Audit","score":0.5174999833106995},{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural language generation","score":0.4514000117778778},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.42980000376701355},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.39329999685287476},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.36500000953674316},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.35370001196861267}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.7788000106811523},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6891000270843506},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.675599992275238},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5708000063896179},{"id":"https://openalex.org/C199521495","wikidata":"https://www.wikidata.org/wiki/Q181487","display_name":"Audit","level":2,"score":0.5174999833106995},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5166000127792358},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.4514000117778778},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.42980000376701355},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4041000008583069},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.39329999685287476},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38609999418258667},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.36500000953674316},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.35370001196861267},{"id":"https://openalex.org/C194541083","wikidata":"https://www.wikidata.org/wiki/Q457174","display_name":"Workaround","level":2,"score":0.335999995470047},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.3312999904155731},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.3237999975681305},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.3221000134944916},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.298799991607666},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C2777042071","wikidata":"https://www.wikidata.org/wiki/Q6509304","display_name":"Leakage (economics)","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C8505890","wikidata":"https://www.wikidata.org/wiki/Q605095","display_name":"Budget constraint","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2596000134944916}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.15852","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.15852","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15852","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.15852","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Clinical":[0],"natural":[1],"language":[2],"processing":[3],"(NLP)":[4],"models":[5,21,134],"have":[6],"shown":[7],"promise":[8],"for":[9,47],"supporting":[10],"hospital":[11],"discharge":[12,109],"planning":[13],"by":[14],"leveraging":[15],"narrative":[16],"clinical":[17,35,58,77,154],"documentation.":[18],"However,":[19],"note-based":[20],"are":[22],"particularly":[23],"vulnerable":[24],"to":[25,72,97,104],"temporal":[26,80,159],"and":[27,37,60,75,99,127,138,162],"lexical":[28,147],"leakage,":[29],"where":[30,50],"documentation":[31],"artifacts":[32],"encode":[33],"future":[34],"decisions":[36],"inflate":[38],"apparent":[39],"predictive":[40,124],"performance.":[41,167],"Such":[42],"behavior":[43],"poses":[44],"substantial":[45],"risks":[46],"real-world":[48],"deployment,":[49],"overconfident":[51],"or":[52],"temporally":[53],"invalid":[54],"predictions":[55],"can":[56],"disrupt":[57],"workflows":[59],"compromise":[61],"patient":[62],"safety.":[63],"This":[64],"study":[65],"focuses":[66],"on":[67,145],"system-level":[68],"design":[69],"choices":[70],"required":[71],"build":[73],"safe":[74],"deployable":[76],"NLP":[78,155],"under":[79],"leakage":[81],"constraints.":[82],"We":[83],"present":[84],"a":[85,116],"lightweight":[86],"auditing":[87,122],"pipeline":[88],"that":[89,132,152],"integrates":[90],"interpretability":[91],"into":[92],"the":[93],"model":[94],"development":[95],"process":[96],"identify":[98],"suppress":[100],"leakage-prone":[101],"signals":[102],"prior":[103],"final":[105],"training.":[106],"Using":[107],"next-day":[108],"prediction":[110],"after":[111],"elective":[112],"spine":[113],"surgery":[114],"as":[115],"case":[117],"study,":[118],"we":[119],"evaluate":[120],"how":[121],"affects":[123],"behavior,":[125],"calibration,":[126,161],"safety-relevant":[128],"trade-offs.":[129],"Results":[130],"show":[131],"audited":[133],"exhibit":[135],"more":[136],"conservative":[137],"better-calibrated":[139],"probability":[140],"estimates,":[141],"with":[142],"reduced":[143],"reliance":[144],"discharge-related":[146],"cues.":[148],"These":[149],"findings":[150],"emphasize":[151],"deployment-ready":[153],"systems":[156],"should":[157],"prioritize":[158],"validity,":[160],"behavioral":[163],"robustness":[164],"over":[165],"optimistic":[166]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-20T00:00:00"}
