{"id":"https://openalex.org/W4366459394","doi":"https://doi.org/10.1145/3555776.3578577","title":"A Biomedical Entity Extraction Pipeline for Oncology Health Records in Portuguese","display_name":"A Biomedical Entity Extraction Pipeline for Oncology Health Records in Portuguese","publication_year":2023,"publication_date":"2023-03-27","ids":{"openalex":"https://openalex.org/W4366459394","doi":"https://doi.org/10.1145/3555776.3578577"},"language":"en","primary_location":{"id":"doi:10.1145/3555776.3578577","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3555776.3578577","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 38th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2304.08999","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080716895","display_name":"Hugo Sousa","orcid":"https://orcid.org/0000-0003-3226-9189"},"institutions":[{"id":"https://openalex.org/I4210166615","display_name":"INESC TEC","ror":"https://ror.org/05fa8ka61","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I4210125590","https://openalex.org/I4210166615"]}],"countries":["PT"],"is_corresponding":true,"raw_author_name":"Hugo Sousa","raw_affiliation_strings":["FCUP, Porto, Portugal","INESC TEC, Porto, Portugal","INESC TEC, Porto, Portugal FCUP, Porto, Portugal"],"affiliations":[{"raw_affiliation_string":"FCUP, Porto, Portugal","institution_ids":[]},{"raw_affiliation_string":"INESC TEC, Porto, Portugal","institution_ids":["https://openalex.org/I4210166615"]},{"raw_affiliation_string":"INESC TEC, Porto, Portugal FCUP, Porto, Portugal","institution_ids":["https://openalex.org/I4210166615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029214477","display_name":"Al\u00ed\u00adpio Jorge","orcid":"https://orcid.org/0000-0002-5475-1382"},"institutions":[{"id":"https://openalex.org/I4210166615","display_name":"INESC TEC","ror":"https://ror.org/05fa8ka61","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I4210125590","https://openalex.org/I4210166615"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Alipio Mario Jorge","raw_affiliation_strings":["FCUP, Porto, Portugal","INESC TEC, Porto, Portugal","INESC TEC, Porto, Portugal FCUP, Porto, Portugal"],"affiliations":[{"raw_affiliation_string":"FCUP, Porto, Portugal","institution_ids":[]},{"raw_affiliation_string":"INESC TEC, Porto, Portugal","institution_ids":["https://openalex.org/I4210166615"]},{"raw_affiliation_string":"INESC TEC, Porto, Portugal FCUP, Porto, Portugal","institution_ids":["https://openalex.org/I4210166615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037049954","display_name":"Arian Pasquali","orcid":"https://orcid.org/0000-0002-3487-9397"},"institutions":[{"id":"https://openalex.org/I4210166615","display_name":"INESC TEC","ror":"https://ror.org/05fa8ka61","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I4210125590","https://openalex.org/I4210166615"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Arian Pasquali","raw_affiliation_strings":["INESC TEC, Porto, Portugal"],"affiliations":[{"raw_affiliation_string":"INESC TEC, Porto, Portugal","institution_ids":["https://openalex.org/I4210166615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101417240","display_name":"Catarina Santos","orcid":"https://orcid.org/0000-0002-9327-4486"},"institutions":[{"id":"https://openalex.org/I4210166615","display_name":"INESC TEC","ror":"https://ror.org/05fa8ka61","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I4210125590","https://openalex.org/I4210166615"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Catarina Santos","raw_affiliation_strings":["INESC TEC, Porto, Portugal"],"affiliations":[{"raw_affiliation_string":"INESC TEC, Porto, Portugal","institution_ids":["https://openalex.org/I4210166615"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086790940","display_name":"M\u00e1rio Amorim\u2010Lopes","orcid":"https://orcid.org/0000-0001-9609-4723"},"institutions":[{"id":"https://openalex.org/I4210166615","display_name":"INESC TEC","ror":"https://ror.org/05fa8ka61","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I4210125590","https://openalex.org/I4210166615"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Mario Lopes","raw_affiliation_strings":["INESC TEC, Porto, Portugal"],"affiliations":[{"raw_affiliation_string":"INESC TEC, Porto, Portugal","institution_ids":["https://openalex.org/I4210166615"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5080716895"],"corresponding_institution_ids":["https://openalex.org/I4210166615"],"apc_list":null,"apc_paid":null,"fwci":0.7613,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.72108536,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"950","last_page":"956"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/portuguese","display_name":"Portuguese","score":0.6940116882324219},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6733347177505493},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5127066373825073},{"id":"https://openalex.org/keywords/health-records","display_name":"Health records","score":0.49136295914649963},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.36278092861175537},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3425142168998718},{"id":"https://openalex.org/keywords/health-care","display_name":"Health care","score":0.26436910033226013},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08281835913658142},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.06120160222053528}],"concepts":[{"id":"https://openalex.org/C35219183","wikidata":"https://www.wikidata.org/wiki/Q5146","display_name":"Portuguese","level":2,"score":0.6940116882324219},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6733347177505493},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5127066373825073},{"id":"https://openalex.org/C3019952477","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Health records","level":3,"score":0.49136295914649963},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.36278092861175537},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3425142168998718},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.26436910033226013},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08281835913658142},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.06120160222053528},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3555776.3578577","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3555776.3578577","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 38th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2304.08999","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2304.08999","pdf_url":"https://arxiv.org/pdf/2304.08999","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2304.08999","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2304.08999","pdf_url":"https://arxiv.org/pdf/2304.08999","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5099999904632568}],"awards":[{"id":"https://openalex.org/G6105681291","display_name":null,"funder_award_id":"LA/P/0063/2020","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G8436637332","display_name":null,"funder_award_id":"LA/P/","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"}],"funders":[{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4366459394.pdf"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W1532940542","https://openalex.org/W1623072288","https://openalex.org/W2004910511","https://openalex.org/W2064675550","https://openalex.org/W2094591616","https://openalex.org/W2147800946","https://openalex.org/W2147880316","https://openalex.org/W2159583324","https://openalex.org/W2402601207","https://openalex.org/W2725541287","https://openalex.org/W2734608416","https://openalex.org/W2751967052","https://openalex.org/W2768488789","https://openalex.org/W2805183640","https://openalex.org/W2805211535","https://openalex.org/W2890830728","https://openalex.org/W2911489562","https://openalex.org/W2943381814","https://openalex.org/W2949108126","https://openalex.org/W2949176808","https://openalex.org/W2962902328","https://openalex.org/W2963341956","https://openalex.org/W2963354094","https://openalex.org/W2970228048","https://openalex.org/W2976398475","https://openalex.org/W2982486166","https://openalex.org/W2995602767","https://openalex.org/W3009990251","https://openalex.org/W3042528607","https://openalex.org/W3046375318","https://openalex.org/W3096266342","https://openalex.org/W3100271366","https://openalex.org/W3101058639","https://openalex.org/W3102123817","https://openalex.org/W3159789914","https://openalex.org/W3172235839","https://openalex.org/W3190642692","https://openalex.org/W3197941046","https://openalex.org/W4200226212","https://openalex.org/W4281709424","https://openalex.org/W4286376980","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W3031052312","https://openalex.org/W4389568370","https://openalex.org/W3032375762","https://openalex.org/W1995515455","https://openalex.org/W2080531066","https://openalex.org/W3108674512","https://openalex.org/W1506200166","https://openalex.org/W1489783725","https://openalex.org/W2148612803"],"abstract_inverted_index":{"Textual":[0],"health":[1,17,104],"records":[2,105],"of":[3,24,53,59,128,140,169,186,197],"cancer":[4],"patients":[5],"are":[6],"usually":[7],"protracted":[8],"and":[9,100,189,200],"highly":[10],"unstructured,":[11],"making":[12],"it":[13],"very":[14],"time-consuming":[15],"for":[16,69,82,120,149,166],"professionals":[18],"to":[19,34,96],"get":[20],"a":[21,46,177],"complete":[22],"overview":[23],"the":[25,51,57,74,92,117,138,141,158,164,167,170,194],"patient's":[26],"therapeutic":[27],"course.":[28],"As":[29],"such":[30],"limitations":[31],"can":[32],"lead":[33],"suboptimal":[35],"and/or":[36],"inefficient":[37],"treatment":[38],"procedures,":[39,98,198],"healthcare":[40],"providers":[41],"would":[42],"greatly":[43],"benefit":[44],"from":[45,102],"system":[47],"that":[48],"effectively":[49],"summarizes":[50],"information":[52],"those":[54],"records.":[55],"With":[56],"advent":[58],"deep":[60],"neural":[61,178],"models,":[62,174],"this":[63,88],"objective":[64],"has":[65],"been":[66],"partially":[67],"attained":[68],"English":[70],"clinical":[71],"texts,":[72],"however,":[73],"research":[75],"community":[76],"still":[77],"lacks":[78],"an":[79],"effective":[80],"solution":[81],"languages":[83],"with":[84,116,180],"limited":[85],"resources.":[86],"In":[87],"paper,":[89],"we":[90,94,155,160],"present":[91,157],"approach":[93],"developed":[95],"extract":[97],"drugs,":[99,199],"diseases":[101],"oncology":[103],"written":[106],"in":[107,114,153,162,193],"European":[108],"Portuguese.":[109],"This":[110],"project":[111],"was":[112],"conducted":[113],"collaboration":[115],"Portuguese":[118],"Institute":[119],"Oncology":[121],"which,":[122],"besides":[123],"holding":[124],"over":[125],"10":[126],"years":[127],"duly":[129],"protected":[130],"medical":[131],"records,":[132],"also":[133,156],"provided":[134],"oncologist":[135],"expertise":[136],"throughout":[137],"development":[139,168],"project.":[142],"Since":[143],"there":[144],"is":[145],"no":[146],"annotated":[147],"corpus":[148,165],"biomedical":[150],"entity":[151,181],"extraction":[152,196],"Portuguese,":[154],"strategy":[159],"followed":[161],"annotating":[163],"models.":[171],"The":[172],"final":[173],"which":[175],"combined":[176],"architecture":[179],"linking,":[182],"achieved":[183],"F1":[184],"scores":[185],"88.6,":[187],"95.0,":[188],"55.8":[190],"per":[191],"cent":[192],"mention":[195],"diseases,":[201],"respectively.":[202]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2023-04-22T00:00:00"}
