{"id":"https://openalex.org/W6910614608","doi":"https://doi.org/10.48550/arxiv.2503.14173","title":"NERCat: Fine-Tuning for Enhanced Named Entity Recognition in Catalan","display_name":"NERCat: Fine-Tuning for Enhanced Named Entity Recognition in Catalan","publication_year":2025,"publication_date":"2025-03-18","ids":{"openalex":"https://openalex.org/W6910614608","doi":"https://doi.org/10.48550/arxiv.2503.14173"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2503.14173","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.14173","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2503.14173","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ferreres, Guillem Cadevall","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ferreres, Guillem Cadevall","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sanz, Marc Serrano","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sanz, Marc Serrano","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"G\u00e1mez, Marc Bardeli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"G\u00e1mez, Marc Bardeli","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Basullas, Pol Gerdt","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Basullas, Pol Gerdt","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ruiz, Francesc Tarres","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruiz, Francesc Tarres","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Ferrero, Raul Quijada","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ferrero, Raul Quijada","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9538999795913696,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9538999795913696,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.009200000204145908,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.00570000009611249,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nucleofection","display_name":"Nucleofection","score":0.43939998745918274},{"id":"https://openalex.org/keywords/gestational-period","display_name":"Gestational period","score":0.3587999939918518},{"id":"https://openalex.org/keywords/tsg101","display_name":"TSG101","score":0.3147999942302704},{"id":"https://openalex.org/keywords/diafiltration","display_name":"Diafiltration","score":0.31299999356269836},{"id":"https://openalex.org/keywords/dysgeusia","display_name":"Dysgeusia","score":0.3003999888896942},{"id":"https://openalex.org/keywords/hyporeflexia","display_name":"Hyporeflexia","score":0.2962999939918518},{"id":"https://openalex.org/keywords/proteogenomics","display_name":"Proteogenomics","score":0.2921999990940094},{"id":"https://openalex.org/keywords/fusible-alloy","display_name":"Fusible alloy","score":0.28459998965263367}],"concepts":[{"id":"https://openalex.org/C144251240","wikidata":"https://www.wikidata.org/wiki/Q7068229","display_name":"Nucleofection","level":4,"score":0.43939998745918274},{"id":"https://openalex.org/C2992336715","wikidata":"https://www.wikidata.org/wiki/Q63431143","display_name":"Gestational period","level":4,"score":0.3587999939918518},{"id":"https://openalex.org/C2778283623","wikidata":"https://www.wikidata.org/wiki/Q18032200","display_name":"TSG101","level":5,"score":0.3147999942302704},{"id":"https://openalex.org/C18743360","wikidata":"https://www.wikidata.org/wiki/Q1208096","display_name":"Diafiltration","level":4,"score":0.31299999356269836},{"id":"https://openalex.org/C2777054765","wikidata":"https://www.wikidata.org/wiki/Q6402731","display_name":"Dysgeusia","level":3,"score":0.3003999888896942},{"id":"https://openalex.org/C2777158700","wikidata":"https://www.wikidata.org/wiki/Q1419356","display_name":"Hyporeflexia","level":3,"score":0.2962999939918518},{"id":"https://openalex.org/C145741570","wikidata":"https://www.wikidata.org/wiki/Q7251534","display_name":"Proteogenomics","level":5,"score":0.2921999990940094},{"id":"https://openalex.org/C133074676","wikidata":"https://www.wikidata.org/wiki/Q428729","display_name":"Fusible alloy","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C180938184","wikidata":"https://www.wikidata.org/wiki/Q2142270","display_name":"Liquation","level":3,"score":0.2842999994754791},{"id":"https://openalex.org/C2779627259","wikidata":"https://www.wikidata.org/wiki/Q779763","display_name":"Pretext","level":3,"score":0.28299999237060547},{"id":"https://openalex.org/C104545631","wikidata":"https://www.wikidata.org/wiki/Q464858","display_name":"Demotion","level":3,"score":0.28200000524520874},{"id":"https://openalex.org/C2781032047","wikidata":"https://www.wikidata.org/wiki/Q938793","display_name":"Articular cartilage damage","level":5,"score":0.265500009059906},{"id":"https://openalex.org/C2777742743","wikidata":"https://www.wikidata.org/wiki/Q19904005","display_name":"Durvalumab","level":5,"score":0.26489999890327454},{"id":"https://openalex.org/C135979968","wikidata":"https://www.wikidata.org/wiki/Q609809","display_name":"Protein isoform","level":5,"score":0.2630999982357025},{"id":"https://openalex.org/C2776781215","wikidata":"https://www.wikidata.org/wiki/Q83253","display_name":"Triacetin","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C2776356786","wikidata":"https://www.wikidata.org/wiki/Q1048573","display_name":"Tubulopathy","level":3,"score":0.258899986743927},{"id":"https://openalex.org/C2777935831","wikidata":"https://www.wikidata.org/wiki/Q3144949","display_name":"Hemopericardium","level":3,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2503.14173","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.14173","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2503.14173","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.14173","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4148593842983246}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Named":[0],"Entity":[1],"Recognition":[2],"(NER)":[3],"is":[4],"a":[5,45,63],"critical":[6],"component":[7],"of":[8,28,37,48,65,114],"Natural":[9],"Language":[10],"Processing":[11],"(NLP)":[12],"for":[13,21,58,98,124],"extracting":[14],"structured":[15],"information":[16],"from":[17],"unstructured":[18],"text.":[19,60],"However,":[20],"low-resource":[22,118],"languages":[23,119],"like":[24],"Catalan,":[25],"the":[26,35,49,75,112,122],"performance":[27,56],"NER":[29,55],"systems":[30],"often":[31],"suffers":[32],"due":[33],"to":[34,53,71],"lack":[36],"high-quality":[38,133],"annotated":[39,67],"datasets.":[40,134],"This":[41,109],"paper":[42],"introduces":[43],"NERCat,":[44],"fine-tuned":[46],"version":[47],"GLiNER[1]":[50],"model,":[51,76],"designed":[52],"improve":[54],"specifically":[57],"Catalan":[59,68,126],"We":[61],"used":[62],"dataset":[64],"manually":[66],"television":[69],"transcriptions":[70],"train":[72],"and":[73,84,95,107,120,132],"fine-tune":[74],"focusing":[77],"on":[78],"domains":[79],"such":[80,103],"as":[81,104],"politics,":[82],"sports,":[83],"culture.":[85],"The":[86],"evaluation":[87],"results":[88],"show":[89],"significant":[90],"improvements":[91],"in":[92,117],"precision,":[93],"recall,":[94],"F1-score,":[96],"particularly":[97],"underrepresented":[99],"named":[100],"entity":[101],"categories":[102],"Law,":[105],"Product,":[106],"Facility.":[108],"study":[110],"demonstrates":[111],"effectiveness":[113],"domain-specific":[115],"fine-tuning":[116],"highlights":[121],"potential":[123],"enhancing":[125],"NLP":[127],"applications":[128],"through":[129],"manual":[130],"annotation":[131]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
