{"id":"https://openalex.org/W4415198590","doi":"https://doi.org/10.48550/arxiv.2505.09794","title":"Automated Detection of Clinical Entities in Lung and Breast Cancer Reports Using NLP Techniques","display_name":"Automated Detection of Clinical Entities in Lung and Breast Cancer Reports Using NLP Techniques","publication_year":2025,"publication_date":"2025-05-14","ids":{"openalex":"https://openalex.org/W4415198590","doi":"https://doi.org/10.48550/arxiv.2505.09794"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2505.09794","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.09794","pdf_url":"https://arxiv.org/pdf/2505.09794","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2505.09794","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120004985","display_name":"J. Moreno-Casanova","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Moreno-Casanova, J.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120004986","display_name":"J. M. Au\u00f1\u00f3n","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Au\u00f1\u00f3n, J. M.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029536669","display_name":"Aleix Mart\u00ednez\u2010P\u00e9rez","orcid":"https://orcid.org/0000-0003-0601-932X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M\u00e1rtinez-P\u00e9rez, A.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054368953","display_name":"Mar\u00eda\u2010Eugenia P\u00e9rez\u2010Mart\u00ednez","orcid":"https://orcid.org/0000-0001-7863-0043"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"P\u00e9rez-Mart\u00ednez, M. E.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120004987","display_name":"M. E. Gas-L\u00f3pez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gas-L\u00f3pez, M. E.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5120004985"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.948199987411499,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9074000120162964,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/snomed-ct","display_name":"SNOMED CT","score":0.8335999846458435},{"id":"https://openalex.org/keywords/breast-cancer","display_name":"Breast cancer","score":0.5090000033378601},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.48660001158714294},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.47360000014305115},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.4352000057697296},{"id":"https://openalex.org/keywords/health-records","display_name":"Health records","score":0.4341000020503998},{"id":"https://openalex.org/keywords/lung-cancer","display_name":"Lung cancer","score":0.39640000462532043}],"concepts":[{"id":"https://openalex.org/C206497026","wikidata":"https://www.wikidata.org/wiki/Q1753883","display_name":"SNOMED CT","level":3,"score":0.8335999846458435},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6531999707221985},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5997999906539917},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5550000071525574},{"id":"https://openalex.org/C530470458","wikidata":"https://www.wikidata.org/wiki/Q128581","display_name":"Breast cancer","level":3,"score":0.5090000033378601},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.48660001158714294},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.47360000014305115},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.4352000057697296},{"id":"https://openalex.org/C3019952477","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Health records","level":3,"score":0.4341000020503998},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.412200003862381},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4120999872684479},{"id":"https://openalex.org/C2776256026","wikidata":"https://www.wikidata.org/wiki/Q47912","display_name":"Lung cancer","level":2,"score":0.39640000462532043},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.37860000133514404},{"id":"https://openalex.org/C3020144179","wikidata":"https://www.wikidata.org/wiki/Q10871684","display_name":"Electronic health record","level":3,"score":0.3546999990940094},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.3043999969959259},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.27799999713897705},{"id":"https://openalex.org/C71472368","wikidata":"https://www.wikidata.org/wiki/Q676880","display_name":"Text mining","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C2779974597","wikidata":"https://www.wikidata.org/wiki/Q28448986","display_name":"Clinical Practice","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2505.09794","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.09794","pdf_url":"https://arxiv.org/pdf/2505.09794","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2505.09794","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2505.09794","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2505.09794","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.09794","pdf_url":"https://arxiv.org/pdf/2505.09794","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Research":[0,188],"projects,":[1],"including":[2,146],"those":[3],"focused":[4],"on":[5,8,61,77],"cancer,":[6],"rely":[7],"the":[9,26,46,72,99,159,215,223,238],"manual":[10],"extraction":[11,47],"of":[12,28,48,89,103,161,244],"information":[13,176],"from":[14,51,177,186],"clinical":[15,119,175,210,245],"reports.":[16],"This":[17],"process":[18],"is":[19],"time-consuming":[20],"and":[21,63,71,82,101,121,130,136,151,172,201,263],"prone":[22],"to":[23,67,169,180],"errors,":[24],"limiting":[25],"efficiency":[27,102],"data-driven":[29],"approaches":[30],"in":[31,86,118,232,247,258],"healthcare.":[32],"To":[33,97,218],"address":[34],"these":[35,181,248],"challenges,":[36],"Natural":[37],"Language":[38],"Processing":[39],"(NLP)":[40],"offers":[41],"an":[42],"alternative":[43],"for":[44,93],"automating":[45],"relevant":[49,116],"data":[50,84,104],"electronic":[52],"health":[53],"records":[54],"(EHRs).":[55],"In":[56,154],"this":[57,155],"study,":[58],"we":[59,106,157,221],"focus":[60],"lung":[62,203],"breast":[64,199],"cancer":[65,90,200,204,249],"due":[66],"their":[68],"high":[69],"incidence":[70],"significant":[73],"impact":[74],"they":[75],"have":[76],"public":[78],"health.":[79],"Early":[80],"detection":[81],"effective":[83],"management":[85],"both":[87],"types":[88],"are":[91],"crucial":[92],"improving":[94],"patient":[95],"outcomes.":[96],"enhance":[98],"accuracy":[100],"extraction,":[105],"utilized":[107],"GMV's":[108],"NLP":[109,162],"tool":[110],"uQuery,":[111],"which":[112],"excels":[113],"at":[114],"identifying":[115,259],"entities":[117,138,211,246,260,271],"texts":[120],"converting":[122],"them":[123,142],"into":[124],"standardized":[125],"formats":[126],"such":[127],"as":[128],"SNOMED":[129],"OMOP.":[131],"uQuery":[132],"not":[133],"only":[134],"detects":[135],"classifies":[137],"but":[139],"also":[140],"associates":[141],"with":[143,208,268],"contextual":[144],"information,":[145],"negated":[147],"entities,":[148],"temporal":[149],"aspects,":[150],"patient-related":[152],"details.":[153],"work,":[156],"explore":[158],"use":[160],"techniques,":[163],"specifically":[164],"Named":[165],"Entity":[166],"Recognition":[167],"(NER),":[168],"automatically":[170],"identify":[171],"extract":[173],"key":[174],"EHRs":[178],"related":[179],"two":[182],"cancers.":[183],"A":[184],"dataset":[185],"Health":[187],"Institute":[189],"Hospital":[190],"La":[191,194],"Fe":[192],"(IIS":[193],"Fe),":[195],"comprising":[196],"200":[197],"annotated":[198],"400":[202],"reports,":[205],"was":[206,235],"used,":[207],"eight":[209],"manually":[212],"labeled":[213],"using":[214,237],"Doccano":[216],"platform.":[217],"perform":[219],"NER,":[220],"fine-tuned":[222],"bsc-bio-ehr-en3":[224],"model,":[225],"a":[226],"RoBERTa-based":[227],"biomedical":[228],"linguistic":[229],"model":[230],"pre-trained":[231],"Spanish.":[233],"Fine-tuning":[234],"performed":[236],"Transformers":[239],"architecture,":[240],"enabling":[241],"accurate":[242],"recognition":[243],"types.":[250],"Our":[251],"results":[252],"demonstrate":[253],"strong":[254],"overall":[255],"performance,":[256],"particularly":[257],"like":[261,272],"MET":[262],"PAT,":[264],"although":[265],"challenges":[266],"remain":[267],"less":[269],"frequent":[270],"EVOL.":[273]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-15T00:00:00"}
