{"id":"https://openalex.org/W7162026352","doi":"https://doi.org/10.48550/arxiv.2605.21154","title":"Automated ICD Classification of Psychiatric Diagnoses: From Classical NLP to Large Language Models","display_name":"Automated ICD Classification of Psychiatric Diagnoses: From Classical NLP to Large Language Models","publication_year":2026,"publication_date":"2026-05-20","ids":{"openalex":"https://openalex.org/W7162026352","doi":"https://doi.org/10.48550/arxiv.2605.21154"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.21154","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21154","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.21154","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136636865","display_name":"Fernando Ortega","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ortega, Fernando","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136654611","display_name":"Ra\u00fal Lara-Cabrera","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lara-Cabrera, Ra\u00fal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072641011","display_name":"Jorge Due\u00f1as-Ler\u00edn","orcid":"https://orcid.org/0000-0002-4685-519X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Due\u00f1as-Ler\u00edn, Jorge","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136669379","display_name":"Alejandro de la Torre-Luque","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"de la Torre-Luque, Alejandro","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065674383","display_name":"M Robert","orcid":"https://orcid.org/0000-0003-0646-2997"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Robert, Merc\u00e9 Salvador","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136622560","display_name":"Enrique Baca-Garc\u00eda","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baca-Garc\u00eda, Enrique","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.5616999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.5616999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.21080000698566437,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12308","display_name":"Mental Health and Psychiatry","score":0.025800000876188278,"subfield":{"id":"https://openalex.org/subfields/1211","display_name":"Philosophy"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.7626000046730042},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.49720001220703125},{"id":"https://openalex.org/keywords/unified-medical-language-system","display_name":"Unified Medical Language System","score":0.4797999858856201},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.4043000042438507},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4002000093460083},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.39719998836517334},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3880000114440918},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.3763999938964844}],"concepts":[{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.7626000046730042},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6873000264167786},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.616100013256073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6151000261306763},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.49720001220703125},{"id":"https://openalex.org/C69505689","wikidata":"https://www.wikidata.org/wiki/Q455338","display_name":"Unified Medical Language System","level":2,"score":0.4797999858856201},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.4043000042438507},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4002000093460083},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.39719998836517334},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3880000114440918},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.3763999938964844},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37439998984336853},{"id":"https://openalex.org/C154874363","wikidata":"https://www.wikidata.org/wiki/Q3518464","display_name":"Medical classification","level":2,"score":0.3662000000476837},{"id":"https://openalex.org/C534262118","wikidata":"https://www.wikidata.org/wiki/Q177719","display_name":"Medical diagnosis","level":2,"score":0.3547999858856201},{"id":"https://openalex.org/C2777102477","wikidata":"https://www.wikidata.org/wiki/Q10469820","display_name":"Language disorder","level":3,"score":0.34389999508857727},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2818000018596649},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.27410000562667847},{"id":"https://openalex.org/C2994403141","wikidata":"https://www.wikidata.org/wiki/Q2976573","display_name":"Psychiatric diagnosis","level":3,"score":0.27140000462532043},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.27129998803138733},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C134362201","wikidata":"https://www.wikidata.org/wiki/Q317309","display_name":"Mental health","level":2,"score":0.2529999911785126}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.21154","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21154","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.21154","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21154","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7562835812568665,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Mental":[0],"health":[1],"has":[2],"become":[3],"a":[4,9,50,112],"global":[5],"priority,":[6],"leading":[7],"to":[8,32,71,123],"massive":[10],"administrative":[11],"burden":[12],"in":[13],"the":[14,22,33,108,131,138],"coding":[15],"of":[16,24,36,53,115,133,141],"clinical":[17,125],"diagnoses.":[18],"This":[19,117],"study":[20],"proposes":[21],"automation":[23],"psychiatric":[25,56,142],"diagnostic":[26],"analysis":[27],"by":[28,92],"mapping":[29],"free-text":[30],"descriptions":[31],"International":[34],"Classification":[35],"Diseases":[37],"(ICD)":[38],"using":[39],"Natural":[40],"Language":[41,74],"Processing":[42],"(NLP)":[43],"and":[44,81,97,137],"Machine":[45],"Learning":[46],"(ML)":[47],"techniques.":[48],"Utilizing":[49],"specialized":[51],"dataset":[52],"145,513":[54],"Spanish":[55],"descriptions,":[57],"various":[58],"text":[59],"representation":[60],"paradigms":[61],"were":[62],"evaluated,":[63],"ranging":[64],"from":[65],"classical":[66],"frequency-based":[67],"models":[68],"(BoW,":[69],"TF-IDF)":[70],"state-of-the-art":[72],"Large":[73],"Models":[75],"(LLMs)":[76],"such":[77],"as":[78],"e5\\_large,":[79],"BioLORD,":[80],"Llama-3-8B.":[82],"Results":[83],"indicate":[84],"that":[85,120],"transformer-based":[86],"embeddings":[87],"consistently":[88],"outperform":[89],"traditional":[90],"methods":[91],"capturing":[93],"implicit":[94],"semantic":[95],"cues":[96],"nuanced":[98],"medical":[99],"terminology.":[100],"The":[101],"e5\\_large":[102],"model,":[103],"through":[104],"end-to-end":[105],"fine-tuning,":[106],"achieved":[107],"highest":[109],"performance":[110],"with":[111],"$F1_{micro}$":[113],"score":[114],"0.866.":[116],"research":[118],"demonstrates":[119],"adapting":[121],"LLMs":[122],"specific":[124],"nomenclature":[126],"is":[127],"essential":[128],"for":[129],"overcoming":[130],"challenges":[132],"``long-tail''":[134],"label":[135],"distributions":[136],"inherent":[139],"ambiguity":[140],"discourse.":[143]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-22T00:00:00"}
