{"id":"https://openalex.org/W7138035680","doi":"https://doi.org/10.48550/arxiv.2603.15270","title":"From Documents to Spans: Scalable Supervision for Evidence-Based ICD Coding with LLMs","display_name":"From Documents to Spans: Scalable Supervision for Evidence-Based ICD Coding with LLMs","publication_year":2026,"publication_date":"2026-03-16","ids":{"openalex":"https://openalex.org/W7138035680","doi":"https://doi.org/10.48550/arxiv.2603.15270"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.15270","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15270","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.15270","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129742590","display_name":"Xu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Xu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109538291","display_name":"Wenxin Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Wenxin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129704608","display_name":"Chenxu Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Chenxu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129666080","display_name":"Rongsheng Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Rongsheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129692894","display_name":"Kun Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Zhiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129687121","display_name":"S. Kevin Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Xiaodong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhang, Kun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Kun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Zhou, S. Kevin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, S. Kevin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5129742590"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.267300009727478,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.267300009727478,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.11110000312328339,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.054999999701976776,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.9269999861717224},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6983000040054321},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.6700000166893005},{"id":"https://openalex.org/keywords/icd-10","display_name":"ICD-10","score":0.4553000032901764},{"id":"https://openalex.org/keywords/medical-classification","display_name":"Medical classification","score":0.39809998869895935},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.39160001277923584},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3862999975681305},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.3605000078678131}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.9269999861717224},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6983000040054321},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.6700000166893005},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.651199996471405},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5791000127792358},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5504999756813049},{"id":"https://openalex.org/C2781116378","wikidata":"https://www.wikidata.org/wiki/Q45127","display_name":"ICD-10","level":2,"score":0.4553000032901764},{"id":"https://openalex.org/C154874363","wikidata":"https://www.wikidata.org/wiki/Q3518464","display_name":"Medical classification","level":2,"score":0.39809998869895935},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.39160001277923584},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3862999975681305},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3605000078678131},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.334199994802475},{"id":"https://openalex.org/C77637269","wikidata":"https://www.wikidata.org/wiki/Q7002051","display_name":"Neural coding","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C45827449","wikidata":"https://www.wikidata.org/wiki/Q5270338","display_name":"Diagnosis code","level":3,"score":0.3167000114917755},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.31220000982284546},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.31150001287460327},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28130000829696655},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.27399998903274536},{"id":"https://openalex.org/C2776809875","wikidata":"https://www.wikidata.org/wiki/Q1375963","display_name":"Converse","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2669999897480011},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.26589998602867126},{"id":"https://openalex.org/C138816342","wikidata":"https://www.wikidata.org/wiki/Q189603","display_name":"Public health","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.25380000472068787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.15270","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15270","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.15270","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15270","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7252090573310852,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"International":[0],"Classification":[1],"of":[2,124,140,179,183],"Diseases":[3],"(ICD)":[4],"coding":[5,22,104],"assigns":[6],"diagnosis":[7],"codes":[8],"to":[9,48,84,114,127,144,149],"clinical":[10,19,116],"documents":[11,126],"and":[12,18,80,110,132,156,186,198],"is":[13,62],"essential":[14],"for":[15,66,191],"healthcare":[16],"billing":[17],"analysis.":[20],"Reliable":[21],"requires":[23],"that":[24,57,101],"each":[25,192],"predicted":[26,193],"code":[27,40,133],"be":[28,158],"supported":[29],"by":[30],"explicit":[31,188],"textual":[32],"evidence.":[33],"However,":[34],"existing":[35],"public":[36],"datasets":[37],"provide":[38],"only":[39,177],"labels,":[41],"without":[42],"evidence":[43,60,75,129,142,190],"annotations,":[44],"limiting":[45],"models'":[46],"ability":[47,105],"learn":[49,73],"evidence-grounded":[50],"predictions.":[51],"In":[52],"this":[53,91,112],"work,":[54],"we":[55,93,119],"argue":[56],"dense,":[58],"document-level":[59,86],"annotation":[61],"not":[63],"always":[64],"necessary":[65],"learning":[67],"evidence-based":[68,87],"coding.":[69,88],"Instead,":[70],"models":[71],"can":[72,157],"code-specific":[74],"patterns":[76,83],"from":[77],"local":[78],"spans":[79,143],"use":[81,120],"these":[82],"support":[85],"Based":[89],"on":[90],"insight,":[92],"propose":[94],"Span-Centric":[95],"Learning":[96],"(SCL),":[97],"a":[98,121,137],"training":[99,181],"framework":[100],"strengthens":[102],"LLMs'":[103],"at":[106,176],"the":[107,164,180],"span":[108,152],"level":[109],"transfers":[111],"capability":[113],"full":[115],"documents.":[117],"Specifically,":[118],"small":[122],"set":[123],"annotated":[125],"supervise":[128],"recognition,":[130],"aggregation,":[131],"assignment,":[134],"while":[135],"leveraging":[136],"large":[138],"collection":[139],"lightweight":[141],"reinforce":[145],"span-level":[146],"reasoning.":[147],"Due":[148],"their":[150],"compactness,":[151],"annotations":[153],"are":[154],"scalable":[155],"further":[159],"augmented":[160],"through":[161],"synthesis.":[162],"Under":[163],"same":[165],"Llama3.1-8B":[166],"backbone,":[167],"our":[168],"approach":[169],"achieves":[170],"an":[171],"8.2-point":[172],"improvement":[173],"in":[174],"macro-F1":[175],"20%":[178],"cost":[182],"standard":[184],"SFT,":[185],"provides":[187],"supporting":[189],"code,":[194],"enabling":[195],"human":[196],"auditing":[197],"revision.":[199]},"counts_by_year":[],"updated_date":"2026-05-09T06:09:20.037420","created_date":"2026-03-18T00:00:00"}
