{"id":"https://openalex.org/W7125943757","doi":"https://doi.org/10.48550/arxiv.2601.19286","title":"ReToP: Learning to Rewrite Electronic Health Records for Clinical Prediction","display_name":"ReToP: Learning to Rewrite Electronic Health Records for Clinical Prediction","publication_year":2026,"publication_date":"2026-01-27","ids":{"openalex":"https://openalex.org/W7125943757","doi":"https://doi.org/10.48550/arxiv.2601.19286"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.19286","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.19286","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.19286","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124124370","display_name":"Jesus Lovon-Melgarejo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210119061","display_name":"Institut de Recherche en Informatique de Toulouse","ror":"https://ror.org/01rx4qw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I205747304","https://openalex.org/I205747304","https://openalex.org/I4210119061","https://openalex.org/I4387153255","https://openalex.org/I4405258862","https://openalex.org/I4405258862"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Lovon-Melgarejo, Jesus","raw_affiliation_strings":["IRIT"],"affiliations":[{"raw_affiliation_string":"IRIT","institution_ids":["https://openalex.org/I4210119061"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123957917","display_name":"Jose G. Moreno","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153551","display_name":"Institute for Radiological Image Sciences","ror":"https://ror.org/050rr3m98","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210153551"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Moreno, Jose G.","raw_affiliation_strings":["IRIT-IRIS"],"affiliations":[{"raw_affiliation_string":"IRIT-IRIS","institution_ids":["https://openalex.org/I4210153551"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021234384","display_name":"Christine Damase\u2010Michel","orcid":"https://orcid.org/0000-0001-5018-0108"},"institutions":[{"id":"https://openalex.org/I4210153551","display_name":"Institute for Radiological Image Sciences","ror":"https://ror.org/050rr3m98","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210153551"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Damase-Michel, Christine","raw_affiliation_strings":["IRIT-IRIS"],"affiliations":[{"raw_affiliation_string":"IRIT-IRIS","institution_ids":["https://openalex.org/I4210153551"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123966323","display_name":"Lynda Tamine","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153551","display_name":"Institute for Radiological Image Sciences","ror":"https://ror.org/050rr3m98","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210153551"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tamine, Lynda","raw_affiliation_strings":["IRIT-IRIS"],"affiliations":[{"raw_affiliation_string":"IRIT-IRIS","institution_ids":["https://openalex.org/I4210153551"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5124124370"],"corresponding_institution_ids":["https://openalex.org/I4210119061"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.006000000052154064,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0035000001080334187,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.6554999947547913},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5450999736785889},{"id":"https://openalex.org/keywords/medical-classification","display_name":"Medical classification","score":0.5126000046730042},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5113000273704529},{"id":"https://openalex.org/keywords/health-records","display_name":"Health records","score":0.48579999804496765},{"id":"https://openalex.org/keywords/electronic-health-record","display_name":"Electronic health record","score":0.4481000006198883},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.438400000333786},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.4350999891757965},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.38670000433921814},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.37709999084472656}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7056000232696533},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.6554999947547913},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6151999831199646},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5763000249862671},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5450999736785889},{"id":"https://openalex.org/C154874363","wikidata":"https://www.wikidata.org/wiki/Q3518464","display_name":"Medical classification","level":2,"score":0.5126000046730042},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5113000273704529},{"id":"https://openalex.org/C3019952477","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Health records","level":3,"score":0.48579999804496765},{"id":"https://openalex.org/C3020144179","wikidata":"https://www.wikidata.org/wiki/Q10871684","display_name":"Electronic health record","level":3,"score":0.4481000006198883},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.438400000333786},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.4350999891757965},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.38670000433921814},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.37709999084472656},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.36970001459121704},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.35989999771118164},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3253999948501587},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3244999945163727},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32359999418258667},{"id":"https://openalex.org/C206497026","wikidata":"https://www.wikidata.org/wiki/Q1753883","display_name":"SNOMED CT","level":3,"score":0.3215000033378601},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3111000061035156},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3093999922275543},{"id":"https://openalex.org/C195910791","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Medical record","level":2,"score":0.30219998955726624},{"id":"https://openalex.org/C145642194","wikidata":"https://www.wikidata.org/wiki/Q870895","display_name":"Health informatics","level":3,"score":0.29420000314712524},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.28769999742507935},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.2858999967575073},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C2778373050","wikidata":"https://www.wikidata.org/wiki/Q1774706","display_name":"Clinical pathway","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C2779974597","wikidata":"https://www.wikidata.org/wiki/Q28448986","display_name":"Clinical Practice","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.25940001010894775},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.25929999351501465},{"id":"https://openalex.org/C163763905","wikidata":"https://www.wikidata.org/wiki/Q17075943","display_name":"Precision medicine","level":2,"score":0.25209999084472656},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.25119999051094055},{"id":"https://openalex.org/C63527458","wikidata":"https://www.wikidata.org/wiki/Q5133829","display_name":"Clinical decision support system","level":3,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.19286","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.19286","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.19286","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.19286","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7950769662857056,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Electronic":[0],"Health":[1],"Records":[2],"(EHRs)":[3],"provide":[4],"crucial":[5],"information":[6],"for":[7,39,136],"clinical":[8,17,40,110,179],"decision-making.":[9],"However,":[10],"their":[11],"high-dimensionality,":[12],"heterogeneity,":[13],"and":[14,108,194,203],"sparsity":[15],"make":[16],"prediction":[18,41,78,146],"challenging.":[19],"Large":[20],"Language":[21],"Models":[22],"(LLMs)":[23],"allowed":[24],"progress":[25],"towards":[26],"addressing":[27],"this":[28,87,98],"challenge":[29],"by":[30],"leveraging":[31],"parametric":[32],"medical":[33],"knowledge":[34],"to":[35,131,161,191],"enhance":[36,168],"EHR":[37,66,69,106,118,139,159],"data":[38],"tasks.":[42,79],"Despite":[43],"the":[44,52,59,77,115,138,143,158,184],"significant":[45],"achievements":[46],"made":[47],"so":[48],"far,":[49],"most":[50],"of":[51,104,117,186],"existing":[53],"approaches":[54],"are":[55],"fundamentally":[56],"task-agnostic":[57],"in":[58],"sense":[60],"that":[61,96,156,166],"they":[62],"deploy":[63],"LLMs":[64],"as":[65],"encoders":[67],"or":[68],"completion":[70],"modules":[71],"without":[72],"fully":[73],"integrating":[74],"signals":[75],"from":[76],"This":[80],"naturally":[81],"hinders":[82],"task":[83],"performance":[84],"accuracy.":[85],"In":[86],"work,":[88],"we":[89,122],"propose":[90],"Rewrite-To-Predict":[91],"(ReToP),":[92],"an":[93,101,105],"LLM-based":[94],"framework":[95,172],"addresses":[97],"limitation":[99],"through":[100],"end-to-end":[102],"training":[103,120],"rewriter":[107,144,160],"a":[109,149],"predictor.":[111],"To":[112],"cope":[113],"with":[114,145,196],"lack":[116],"rewrite":[119],"data,":[121],"generate":[123,162],"synthetic":[124],"pseudo-labels":[125],"using":[126,148],"clinical-driven":[127],"feature":[128],"selection":[129],"strategies":[130],"create":[132],"diverse":[133],"patient":[134],"rewrites":[135,165,202],"fine-tuning":[137,198],"rewriter.":[140],"ReToP":[141,171,187],"aligns":[142],"objectives":[147],"novel":[150],"Classifier":[151],"Supervised":[152],"Contribution":[153],"(CSC)":[154],"score":[155],"enables":[157],"clinically":[163],"relevant":[164],"directly":[167],"prediction.":[169],"Our":[170],"surpasses":[173],"strong":[174],"baseline":[175],"models":[176],"across":[177],"three":[178],"tasks":[180,195],"on":[181],"MIMIC-IV.":[182],"Moreover,":[183],"analysis":[185],"shows":[188],"its":[189],"generalizability":[190],"unseen":[192],"datasets":[193],"minimal":[197],"while":[199],"preserving":[200],"faithful":[201],"emphasizing":[204],"task-relevant":[205],"predictive":[206],"features.":[207]},"counts_by_year":[],"updated_date":"2026-01-29T23:17:01.242718","created_date":"2026-01-29T00:00:00"}
