{"id":"https://openalex.org/W7128600623","doi":"https://doi.org/10.3390/informatics13020031","title":"LinguoNER: A Language-Agnostic Framework for Named Entity Recognition in Low-Resource Languages with a Focus on Yambeta","display_name":"LinguoNER: A Language-Agnostic Framework for Named Entity Recognition in Low-Resource Languages with a Focus on Yambeta","publication_year":2026,"publication_date":"2026-02-11","ids":{"openalex":"https://openalex.org/W7128600623","doi":"https://doi.org/10.3390/informatics13020031"},"language":"en","primary_location":{"id":"doi:10.3390/informatics13020031","is_oa":true,"landing_page_url":"https://doi.org/10.3390/informatics13020031","pdf_url":"https://www.mdpi.com/2227-9709/13/2/31/pdf?version=1770815896","source":{"id":"https://openalex.org/S2738238905","display_name":"Informatics","issn_l":"2227-9709","issn":["2227-9709"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2227-9709/13/2/31/pdf?version=1770815896","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002175542","display_name":"Philippe Tamla","orcid":"https://orcid.org/0000-0002-0786-4253"},"institutions":[{"id":"https://openalex.org/I120691247","display_name":"FernUniversit\u00e4t in Hagen","ror":"https://ror.org/04tkkr536","country_code":"DE","type":"education","lineage":["https://openalex.org/I120691247"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Philippe Tamla","raw_affiliation_strings":["Faculty of Mathematics and Computer Science, University of Hagen, 58097 Hagen, Germany"],"raw_orcid":"https://orcid.org/0000-0002-0786-4253","affiliations":[{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, University of Hagen, 58097 Hagen, Germany","institution_ids":["https://openalex.org/I120691247"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125595871","display_name":"Stephane Donna","orcid":null},"institutions":[{"id":"https://openalex.org/I1331070479","display_name":"Universit\u00e9 de Yaound\u00e9 I","ror":"https://ror.org/022zbs961","country_code":"CM","type":"education","lineage":["https://openalex.org/I1331070479"]}],"countries":["CM"],"is_corresponding":false,"raw_author_name":"Stephane Donna","raw_affiliation_strings":["Faculty of Information and Communication Technology, ICT University USA, Yaounde P.O. Box 526, Cameroon"],"raw_orcid":"https://orcid.org/0009-0002-1808-4321","affiliations":[{"raw_affiliation_string":"Faculty of Information and Communication Technology, ICT University USA, Yaounde P.O. Box 526, Cameroon","institution_ids":["https://openalex.org/I1331070479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125620698","display_name":"Tobias Bigala","orcid":null},"institutions":[{"id":"https://openalex.org/I1331070479","display_name":"Universit\u00e9 de Yaound\u00e9 I","ror":"https://ror.org/022zbs961","country_code":"CM","type":"education","lineage":["https://openalex.org/I1331070479"]}],"countries":["CM"],"is_corresponding":false,"raw_author_name":"Tobias Bigala","raw_affiliation_strings":["Faculty of Information and Communication Technology, ICT University USA, Yaounde P.O. Box 526, Cameroon"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information and Communication Technology, ICT University USA, Yaounde P.O. Box 526, Cameroon","institution_ids":["https://openalex.org/I1331070479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125606415","display_name":"Dilan Nde","orcid":null},"institutions":[{"id":"https://openalex.org/I1331070479","display_name":"Universit\u00e9 de Yaound\u00e9 I","ror":"https://ror.org/022zbs961","country_code":"CM","type":"education","lineage":["https://openalex.org/I1331070479"]}],"countries":["CM"],"is_corresponding":false,"raw_author_name":"Dilan Nde","raw_affiliation_strings":["Faculty of Information and Communication Technology, ICT University USA, Yaounde P.O. Box 526, Cameroon"],"raw_orcid":"https://orcid.org/0009-0000-9336-0707","affiliations":[{"raw_affiliation_string":"Faculty of Information and Communication Technology, ICT University USA, Yaounde P.O. Box 526, Cameroon","institution_ids":["https://openalex.org/I1331070479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019342236","display_name":"Maxime Yves Julien Manifi Abouh","orcid":"https://orcid.org/0000-0003-1579-5232"},"institutions":[{"id":"https://openalex.org/I1331070479","display_name":"Universit\u00e9 de Yaound\u00e9 I","ror":"https://ror.org/022zbs961","country_code":"CM","type":"education","lineage":["https://openalex.org/I1331070479"]}],"countries":["CM"],"is_corresponding":false,"raw_author_name":"Maxime Yves Julien Manifi Abouh","raw_affiliation_strings":["Higher Teacher Training College, University of Yaounde, Yaounde P.O. Box 47, Cameroon"],"raw_orcid":"https://orcid.org/0000-0003-1579-5232","affiliations":[{"raw_affiliation_string":"Higher Teacher Training College, University of Yaounde, Yaounde P.O. Box 47, Cameroon","institution_ids":["https://openalex.org/I1331070479"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5125595863","display_name":"Florian Freund","orcid":null},"institutions":[{"id":"https://openalex.org/I120691247","display_name":"FernUniversit\u00e4t in Hagen","ror":"https://ror.org/04tkkr536","country_code":"DE","type":"education","lineage":["https://openalex.org/I120691247"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Florian Freund","raw_affiliation_strings":["Faculty of Mathematics and Computer Science, University of Hagen, 58097 Hagen, Germany"],"raw_orcid":"https://orcid.org/0000-0002-7344-6869","affiliations":[{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, University of Hagen, 58097 Hagen, Germany","institution_ids":["https://openalex.org/I120691247"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5002175542"],"corresponding_institution_ids":["https://openalex.org/I120691247"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28894262,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":"2","first_page":"31","last_page":"31"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.54830002784729,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.54830002784729,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2678999900817871,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.062300000339746475,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.7989000082015991},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.6897000074386597},{"id":"https://openalex.org/keywords/named-entity","display_name":"Named entity","score":0.5187000036239624},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4765999913215637},{"id":"https://openalex.org/keywords/entity-linking","display_name":"Entity linking","score":0.46209999918937683},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.42100000381469727},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.41260001063346863},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.39309999346733093},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.37630000710487366}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8587999939918518},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.7989000082015991},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.711899995803833},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.6897000074386597},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6093000173568726},{"id":"https://openalex.org/C2777889803","wikidata":"https://www.wikidata.org/wiki/Q25047676","display_name":"Named entity","level":2,"score":0.5187000036239624},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4765999913215637},{"id":"https://openalex.org/C96711827","wikidata":"https://www.wikidata.org/wiki/Q17012245","display_name":"Entity linking","level":3,"score":0.46209999918937683},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.42100000381469727},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.41260001063346863},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.39309999346733093},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.37630000710487366},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.3646000027656555},{"id":"https://openalex.org/C28076734","wikidata":"https://www.wikidata.org/wiki/Q63087","display_name":"Coreference","level":3,"score":0.3440000116825104},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33640000224113464},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.3292999863624573},{"id":"https://openalex.org/C540372491","wikidata":"https://www.wikidata.org/wiki/Q82622","display_name":"Punctuation","level":2,"score":0.32829999923706055},{"id":"https://openalex.org/C2780224610","wikidata":"https://www.wikidata.org/wiki/Q1530061","display_name":"Credibility","level":2,"score":0.3269999921321869},{"id":"https://openalex.org/C2780233690","wikidata":"https://www.wikidata.org/wiki/Q535347","display_name":"Transparency (behavior)","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3160000145435333},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.3133000135421753},{"id":"https://openalex.org/C161831844","wikidata":"https://www.wikidata.org/wiki/Q2554325","display_name":"Lemmatisation","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.2816999852657318},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2694000005722046},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.26669999957084656},{"id":"https://openalex.org/C95318506","wikidata":"https://www.wikidata.org/wiki/Q6588467","display_name":"Textual entailment","level":3,"score":0.2603999972343445}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/informatics13020031","is_oa":true,"landing_page_url":"https://doi.org/10.3390/informatics13020031","pdf_url":"https://www.mdpi.com/2227-9709/13/2/31/pdf?version=1770815896","source":{"id":"https://openalex.org/S2738238905","display_name":"Informatics","issn_l":"2227-9709","issn":["2227-9709"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Informatics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:4fd1229a13904ab9b19f0b1b623fe98e","is_oa":true,"landing_page_url":"https://doaj.org/article/4fd1229a13904ab9b19f0b1b623fe98e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Informatics, Vol 13, Iss 2, p 31 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/informatics13020031","is_oa":true,"landing_page_url":"https://doi.org/10.3390/informatics13020031","pdf_url":"https://www.mdpi.com/2227-9709/13/2/31/pdf?version=1770815896","source":{"id":"https://openalex.org/S2738238905","display_name":"Informatics","issn_l":"2227-9709","issn":["2227-9709"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Informatics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6674562096595764}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7128600623.pdf","grobid_xml":"https://content.openalex.org/works/W7128600623.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W43120215","https://openalex.org/W1520377376","https://openalex.org/W1580467103","https://openalex.org/W2008495428","https://openalex.org/W2030369136","https://openalex.org/W2089006436","https://openalex.org/W2158342889","https://openalex.org/W2169818249","https://openalex.org/W2795141263","https://openalex.org/W2949303037","https://openalex.org/W2977034261","https://openalex.org/W3009096638","https://openalex.org/W3035390927","https://openalex.org/W3096508121","https://openalex.org/W3098998028","https://openalex.org/W3115058362","https://openalex.org/W3159597830","https://openalex.org/W3207937903","https://openalex.org/W3209897172","https://openalex.org/W4229030698","https://openalex.org/W4378470138","https://openalex.org/W4385572824","https://openalex.org/W4388219236"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"LinguoNER,":[3],"a":[4,22,27,59,87,91,114,154,167,174,194,233,237,258],"practical":[5],"and":[6,37,75,113,146,163,165,215,223,236,241,268,287,294],"extensible":[7],"framework":[8],"for":[9,170,261],"bootstrapping":[10,262],"Named":[11],"Entity":[12],"Recognition":[13],"(NER)":[14],"in":[15,30,48,78,265],"extremely":[16],"low-resource":[17],"languages,":[18],"demonstrated":[19],"on":[20,143,272],"Yambeta,":[21],"Bantu":[23],"language":[24],"spoken":[25],"by":[26,57],"minority":[28],"community":[29],"Cameroon.":[31],"Due":[32],"to":[33,232],"scarce":[34],"digital":[35],"resources":[36,264],"the":[38,98,109,126,288],"absence":[39],"of":[40,251,291],"annotated":[41,107],"corpora,":[42],"Yambeta":[43,102,156],"has":[44],"remained":[45],"largely":[46],"underrepresented":[47,266],"Natural":[49],"Language":[50],"Processing":[51],"(NLP).":[52],"LinguoNER":[53,178,256],"addresses":[54],"this":[55],"gap":[56],"providing":[58],"methodologically":[60],"transparent":[61],"end-to-end":[62],"workflow":[63],"that":[64,159,228],"integrates":[65],"corpus":[66,89,127],"acquisition,":[67],"gazetteer-driven":[68],"automatic":[69],"annotation,":[70,125],"tokenizer":[71,158],"training,":[72],"transformer":[73,169],"fine-tuning,":[74],"multi-level":[76],"evaluation":[77,202],"settings":[79],"where":[80],"large-scale":[81],"manual":[82],"annotation":[83],"is":[84,128,134],"infeasible.":[85],"Using":[86],"Bible-derived":[88],"as":[90,131,245],"linguistically":[92,213],"stable":[93],"starting":[94],"point,":[95],"we":[96],"release":[97],"first":[99],"publicly":[100],"available":[101],"NER":[103,263],"dataset":[104],"(\u224825,000":[105],"tokens)":[106],"with":[108],"CoNLL":[110],"BIO":[111],"scheme":[112],"restricted":[115,231],"entity":[116,219],"schema":[117],"(PER/LOC/ORG).":[118],"Because":[119],"labels":[120],"are":[121,212,230],"generated":[122],"via":[123],"dictionary-based":[124],"best":[129],"characterized":[130],"silver-standard;":[132],"credibility":[133],"strengthened":[135],"through":[136],"recorded":[137],"dictionaries,":[138],"transparency":[139],"logs,":[140],"expert-in-the-loop":[141],"validation":[142],"sampled":[144],"subsets,":[145],"complementary":[147],"qualitative":[148],"error":[149],"analysis.":[150],"We":[151,226],"additionally":[152],"train":[153],"dedicated":[155],"WordPiece":[157],"preserves":[160],"tone":[161],"markers":[162],"diacritics,":[164],"fine-tune":[166],"bert-base-cased":[168],"token":[171],"classification.":[172],"On":[173],"held-out":[175],"test":[176],"split,":[177],"achieves":[179],"strong":[180],"token-level":[181],"performance":[182],"(Precision":[183],"=":[184,187,190],"0.989,":[185],"Recall":[186],"0.981,":[188],"F1":[189],"0.985),":[191],"substantially":[192],"outperforming":[193],"dictionary-only":[195],"gazetteer":[196],"baseline":[197],"(\u0394F1":[198],"\u2248":[199],"0.36).":[200],"Per-entity-type":[201],"further":[203],"indicates":[204],"improvements":[205],"beyond":[206],"surface-form":[207],"matching,":[208],"while":[209],"remaining":[210],"errors":[211],"motivated":[214],"primarily":[216],"involve":[217],"multi-word":[218],"boundaries,":[220],"agglutinative":[221],"constructions,":[222],"tone-/diacritic-sensitive":[224],"tokenization.":[225],"emphasize":[227],"results":[229],"Bible":[234],"domain":[235],"limited":[238],"label":[239],"space,":[240],"should":[242],"be":[243],"interpreted":[244],"proof-of-concept":[246],"evidence":[247],"rather":[248],"than":[249],"claims":[250],"broad":[252],"out-of-domain":[253],"generalization.":[254],"Overall,":[255],"provides":[257],"reproducible":[259],"blueprint":[260],"languages":[267,282],"supports":[269],"future":[270],"work":[271],"broader":[273],"corpora":[274],"sources":[275],"(e.g.,":[276,283],"news,":[277],"OPUS,":[278],"JW300),":[279],"additional":[280],"African":[281],"Yoruba,":[284],"Igbo,":[285],"Bassa),":[286],"iterative":[289],"creation":[290],"expert-refined":[292],"datasets":[293],"gold-standard":[295],"subsets.":[296]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2026-02-11T00:00:00"}
