{"id":"https://openalex.org/W4403304337","doi":"https://doi.org/10.3390/computers13100258","title":"Enhancement of Named Entity Recognition in Low-Resource Languages with Data Augmentation and BERT Models: A Case Study on Urdu","display_name":"Enhancement of Named Entity Recognition in Low-Resource Languages with Data Augmentation and BERT Models: A Case Study on Urdu","publication_year":2024,"publication_date":"2024-10-10","ids":{"openalex":"https://openalex.org/W4403304337","doi":"https://doi.org/10.3390/computers13100258"},"language":"en","primary_location":{"id":"doi:10.3390/computers13100258","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers13100258","pdf_url":"https://www.mdpi.com/2073-431X/13/10/258/pdf?version=1728612270","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2073-431X/13/10/258/pdf?version=1728612270","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061232021","display_name":"Fida Ullah","orcid":null},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Fida Ullah","raw_affiliation_strings":["Instituto Polit\u00e9cnico Nacional (IPN), Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC), Av. Juan de Dios Batiz, s/n, Mexico City 07320, Mexico"],"affiliations":[{"raw_affiliation_string":"Instituto Polit\u00e9cnico Nacional (IPN), Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC), Av. Juan de Dios Batiz, s/n, Mexico City 07320, Mexico","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049701126","display_name":"Alexander Gelbukh","orcid":"https://orcid.org/0000-0001-7845-9039"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":true,"raw_author_name":"Alexander Gelbukh","raw_affiliation_strings":["Instituto Polit\u00e9cnico Nacional (IPN), Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC), Av. Juan de Dios Batiz, s/n, Mexico City 07320, Mexico"],"affiliations":[{"raw_affiliation_string":"Instituto Polit\u00e9cnico Nacional (IPN), Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC), Av. Juan de Dios Batiz, s/n, Mexico City 07320, Mexico","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080486080","display_name":"Muhammad Tayyab Zamir","orcid":null},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Muhammad Tayyab Zamir","raw_affiliation_strings":["Instituto Polit\u00e9cnico Nacional (IPN), Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC), Av. Juan de Dios Batiz, s/n, Mexico City 07320, Mexico"],"affiliations":[{"raw_affiliation_string":"Instituto Polit\u00e9cnico Nacional (IPN), Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC), Av. Juan de Dios Batiz, s/n, Mexico City 07320, Mexico","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037099886","display_name":"Edgardo M. Felipe\u2010River\u00f3n","orcid":"https://orcid.org/0000-0002-9828-3568"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Edgardo Manuel Felipe River\u1f79n","raw_affiliation_strings":["Instituto Polit\u00e9cnico Nacional (IPN), Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC), Av. Juan de Dios Batiz, s/n, Mexico City 07320, Mexico"],"affiliations":[{"raw_affiliation_string":"Instituto Polit\u00e9cnico Nacional (IPN), Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC), Av. Juan de Dios Batiz, s/n, Mexico City 07320, Mexico","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008287867","display_name":"Grigori Sidorov","orcid":"https://orcid.org/0000-0003-3901-3522"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Grigori Sidorov","raw_affiliation_strings":["Instituto Polit\u00e9cnico Nacional (IPN), Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC), Av. Juan de Dios Batiz, s/n, Mexico City 07320, Mexico"],"affiliations":[{"raw_affiliation_string":"Instituto Polit\u00e9cnico Nacional (IPN), Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC), Av. Juan de Dios Batiz, s/n, Mexico City 07320, Mexico","institution_ids":["https://openalex.org/I59361560"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5049701126"],"corresponding_institution_ids":["https://openalex.org/I59361560"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":4.088,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.94682043,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"13","issue":"10","first_page":"258","last_page":"258"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/urdu","display_name":"Urdu","score":0.865683376789093},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.7791001796722412},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6198437213897705},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5715781450271606},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4720039367675781},{"id":"https://openalex.org/keywords/named-entity","display_name":"Named entity","score":0.4541090130805969},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4134165644645691},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4128512144088745},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10058900713920593},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.09034857153892517},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.058174580335617065}],"concepts":[{"id":"https://openalex.org/C2777350258","wikidata":"https://www.wikidata.org/wiki/Q1617","display_name":"Urdu","level":2,"score":0.865683376789093},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.7791001796722412},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6198437213897705},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5715781450271606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4720039367675781},{"id":"https://openalex.org/C2777889803","wikidata":"https://www.wikidata.org/wiki/Q25047676","display_name":"Named entity","level":2,"score":0.4541090130805969},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4134165644645691},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4128512144088745},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10058900713920593},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.09034857153892517},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.058174580335617065},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/computers13100258","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers13100258","pdf_url":"https://www.mdpi.com/2073-431X/13/10/258/pdf?version=1728612270","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:fda736f94a3c4ace954f9985dc00c5f7","is_oa":true,"landing_page_url":"https://doaj.org/article/fda736f94a3c4ace954f9985dc00c5f7","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computers, Vol 13, Iss 10, p 258 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/computers13100258","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers13100258","pdf_url":"https://www.mdpi.com/2073-431X/13/10/258/pdf?version=1728612270","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1158795700","display_name":null,"funder_award_id":"Mexico","funder_id":"https://openalex.org/F4320321739","funder_display_name":"Consejo Nacional de Ciencia y Tecnolog\u00eda"}],"funders":[{"id":"https://openalex.org/F4320321739","display_name":"Consejo Nacional de Ciencia y Tecnolog\u00eda","ror":"https://ror.org/059ex5q34"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403304337.pdf","grobid_xml":"https://content.openalex.org/works/W4403304337.grobid-xml"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W61219374","https://openalex.org/W2000363133","https://openalex.org/W2020278455","https://openalex.org/W2054460891","https://openalex.org/W2060772142","https://openalex.org/W2156488976","https://openalex.org/W2250597603","https://openalex.org/W2251680502","https://openalex.org/W2409439155","https://openalex.org/W2785349534","https://openalex.org/W2808481912","https://openalex.org/W2889383784","https://openalex.org/W2949241676","https://openalex.org/W2952087486","https://openalex.org/W2961231993","https://openalex.org/W3007799408","https://openalex.org/W3025740135","https://openalex.org/W3025955883","https://openalex.org/W3042521488","https://openalex.org/W3102295886","https://openalex.org/W3115908473","https://openalex.org/W3118295810","https://openalex.org/W3159921092","https://openalex.org/W3180181113","https://openalex.org/W3203737917","https://openalex.org/W4200464847","https://openalex.org/W4206505934","https://openalex.org/W4225768509","https://openalex.org/W4237530236","https://openalex.org/W4283395836","https://openalex.org/W4285100132","https://openalex.org/W4285154531","https://openalex.org/W4312433216","https://openalex.org/W4386546962","https://openalex.org/W6605572207","https://openalex.org/W6684234067","https://openalex.org/W6752475066","https://openalex.org/W6794678121","https://openalex.org/W6798060926","https://openalex.org/W6839071858"],"related_works":["https://openalex.org/W4285154531","https://openalex.org/W2032007337","https://openalex.org/W2250347524","https://openalex.org/W2186562580","https://openalex.org/W2155874911","https://openalex.org/W135458787","https://openalex.org/W3017222382","https://openalex.org/W4390279576","https://openalex.org/W3005759282","https://openalex.org/W3128216712"],"abstract_inverted_index":{"Identifying":[0],"and":[1,72,100,128,150],"categorizing":[2],"proper":[3],"nouns":[4],"in":[5,39,173],"text,":[6],"known":[7],"as":[8],"named":[9,81],"entity":[10],"recognition":[11],"(NER),":[12],"is":[13],"crucial":[14],"for":[15,26,59,177],"various":[16],"natural":[17],"language":[18],"processing":[19],"tasks.":[20],"However,":[21],"developing":[22],"effective":[23],"NER":[24,175],"techniques":[25,172],"low-resource":[27,178],"languages":[28,179],"like":[29,180],"Urdu":[30],"poses":[31],"challenges":[32],"due":[33],"to":[34,62,84],"limited":[35],"training":[36],"data,":[37],"particularly":[38],"the":[40,78,103,109,118,123,145,151,167],"nastaliq":[41],"script.":[42],"To":[43],"address":[44],"this,":[45],"our":[46,133],"study":[47],"introduces":[48],"a":[49,138,155],"novel":[50],"data":[51,170],"augmentation":[52,171],"method,":[53],"\u201ccontextual":[54],"word":[55],"embeddings":[56],"augmentation\u201d":[57],"(CWEA),":[58],"Urdu,":[60],"aiming":[61],"enrich":[63],"existing":[64],"datasets.":[65,86],"The":[66],"extended":[67],"dataset,":[68,95],"comprising":[69],"160,132":[70],"tokens":[71],"114,912":[73],"labeled":[74],"entities,":[75],"significantly":[76],"enhances":[77],"coverage":[79],"of":[80,114,122,142,159,169],"entities":[82],"compared":[83],"previous":[85],"We":[87],"evaluated":[88],"several":[89],"transformer":[90],"models":[91],"on":[92,161],"this":[93],"augmented":[94,162],"including":[96],"BERT-multilingual,":[97],"RoBERTa-Urdu-small,":[98],"BERT-base-cased,":[99],"BERT-large-cased.":[101],"Notably,":[102],"BERT-multilingual":[104],"model":[105,136,147,153],"outperformed":[106],"others,":[107],"achieving":[108],"highest":[110],"macro":[111,119,156],"F1":[112,140,157],"score":[113,141,158],"0.982%.":[115],"This":[116],"surpassed":[117],"f1":[120],"scores":[121],"RoBERTa-Urdu-small":[124],"(0.884%),":[125],"BERT-large-cased":[126],"(0.916%),":[127],"BERT-base-cased":[129],"(0.908%)":[130],"models.":[131],"Additionally,":[132],"neural":[134],"network":[135],"achieved":[137,148,154],"micro":[139],"96%,":[143],"while":[144],"RNN":[146],"97%":[149],"BiLSTM":[152],"96%":[160],"data.":[163],"Our":[164],"findings":[165],"underscore":[166],"efficacy":[168],"enhancing":[174],"performance":[176],"Urdu.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
