{"id":"https://openalex.org/W4395448128","doi":"https://doi.org/10.1109/ictas59620.2024.10507140","title":"Datasets Collection Framework for Low-Resourced Languages in South Africa","display_name":"Datasets Collection Framework for Low-Resourced Languages in South Africa","publication_year":2024,"publication_date":"2024-03-07","ids":{"openalex":"https://openalex.org/W4395448128","doi":"https://doi.org/10.1109/ictas59620.2024.10507140"},"language":"en","primary_location":{"id":"doi:10.1109/ictas59620.2024.10507140","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictas59620.2024.10507140","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 Conference on Information Communications Technology and Society (ICTAS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5095887718","display_name":"Nontokozo M. Magangane","orcid":null},"institutions":[{"id":"https://openalex.org/I12553626","display_name":"University of Zululand","ror":"https://ror.org/03v8ter60","country_code":"ZA","type":"education","lineage":["https://openalex.org/I12553626"]}],"countries":["ZA"],"is_corresponding":true,"raw_author_name":"Nontokozo M. Magangane","raw_affiliation_strings":["University of Zululand,Faculty of Science,Department of Computer Science,Empangeni,South Africa","Department of Computer Science, Faculty of Science, University of Zululand, Empangeni, South Africa"],"affiliations":[{"raw_affiliation_string":"University of Zululand,Faculty of Science,Department of Computer Science,Empangeni,South Africa","institution_ids":["https://openalex.org/I12553626"]},{"raw_affiliation_string":"Department of Computer Science, Faculty of Science, University of Zululand, Empangeni, South Africa","institution_ids":["https://openalex.org/I12553626"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012605753","display_name":"Skhumbuzo G. Zwane","orcid":"https://orcid.org/0009-0008-0760-4444"},"institutions":[{"id":"https://openalex.org/I12553626","display_name":"University of Zululand","ror":"https://ror.org/03v8ter60","country_code":"ZA","type":"education","lineage":["https://openalex.org/I12553626"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Skhumbuzo G. Zwane","raw_affiliation_strings":["University of Zululand,Faculty of Science,Department of Computer Science,Empangeni,South Africa","Department of Computer Science, Faculty of Science, University of Zululand, Empangeni, South Africa"],"affiliations":[{"raw_affiliation_string":"University of Zululand,Faculty of Science,Department of Computer Science,Empangeni,South Africa","institution_ids":["https://openalex.org/I12553626"]},{"raw_affiliation_string":"Department of Computer Science, Faculty of Science, University of Zululand, Empangeni, South Africa","institution_ids":["https://openalex.org/I12553626"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039049891","display_name":"Matthew O. Adigun","orcid":"https://orcid.org/0000-0001-6256-5865"},"institutions":[{"id":"https://openalex.org/I12553626","display_name":"University of Zululand","ror":"https://ror.org/03v8ter60","country_code":"ZA","type":"education","lineage":["https://openalex.org/I12553626"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Matthew O. Adigun","raw_affiliation_strings":["University of Zululand,Faculty of Science,Department of Computer Science,Empangeni,South Africa","Department of Computer Science, Faculty of Science, University of Zululand, Empangeni, South Africa"],"affiliations":[{"raw_affiliation_string":"University of Zululand,Faculty of Science,Department of Computer Science,Empangeni,South Africa","institution_ids":["https://openalex.org/I12553626"]},{"raw_affiliation_string":"Department of Computer Science, Faculty of Science, University of Zululand, Empangeni, South Africa","institution_ids":["https://openalex.org/I12553626"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5095887718"],"corresponding_institution_ids":["https://openalex.org/I12553626"],"apc_list":null,"apc_paid":null,"fwci":0.3637,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62514226,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"69","last_page":"74"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10265","display_name":"Multilingual Education and Policy","score":0.9301999807357788,"subfield":{"id":"https://openalex.org/subfields/3310","display_name":"Linguistics and Language"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.9205999970436096,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6680168509483337},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32457131147384644}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6680168509483337},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32457131147384644}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ictas59620.2024.10507140","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictas59620.2024.10507140","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 Conference on Information Communications Technology and Society (ICTAS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W28268536","https://openalex.org/W2055522016","https://openalex.org/W2056119007","https://openalex.org/W2063484628","https://openalex.org/W2124351082","https://openalex.org/W2149684865","https://openalex.org/W2155632266","https://openalex.org/W2946609082","https://openalex.org/W2962937786","https://openalex.org/W2987201163","https://openalex.org/W3010393690","https://openalex.org/W3011458709","https://openalex.org/W3081125651","https://openalex.org/W3117606711","https://openalex.org/W3132191748","https://openalex.org/W3134751001","https://openalex.org/W3173031252","https://openalex.org/W3181865509","https://openalex.org/W4244705308","https://openalex.org/W4256049924","https://openalex.org/W4287866999","https://openalex.org/W6601139856"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0,78,128],"linguistic":[1,69,160,179],"diversity":[2,180],"in":[3,75,88,104,125,166,177,186],"South":[4,32,76],"Africa":[5,33],"presents":[6],"a":[7,57,82,89,158],"unique":[8],"challenge":[9],"for":[10,71,107,122,134],"Natural":[11],"Language":[12,83],"Processing":[13],"(NLP)":[14],"applications,":[15],"as":[16],"many":[17],"of":[18,26,31,50,68,81,152],"the":[19,27,48,51,66,72,105,135,150,167,174],"country's":[20],"languages":[21,30,74],"are":[22],"considered":[23],"low-resourced.":[24],"Eight":[25],"eleven":[28],"official":[29],"currently":[34],"lack":[35],"sufficient":[36],"documentation":[37],"and":[38,44,101,110,162,181],"resources,":[39],"apart":[40],"from":[41],"English,":[42],"Afrikaans,":[43],"isiZulu,":[45],"which":[46],"house":[47],"majority":[49],"reported":[52],"datasets.":[53],"This":[54,170],"paper":[55],"introduces":[56],"comprehensive":[58],"data":[59],"collection":[60],"framework":[61,79],"specifically":[62],"tailored":[63],"to":[64,95,157,173],"address":[65],"scarcity":[67],"resources":[70,144],"underrepresented":[73],"Africa.":[77],"consists":[80],"Identification":[84],"(LI)":[85],"model":[86,112],"embedded":[87],"database":[90,106],"portal":[91],"that":[92,141,155],"is":[93,139],"used":[94],"gather":[96],"text":[97],"data,":[98],"label":[99],"it,":[100],"store":[102],"it":[103],"future":[108],"usage":[109],"LI":[111,126],"retraining.":[113],"In":[114],"addition,":[115],"different":[116],"machine":[117],"learning":[118],"classifiers":[119],"were":[120],"compared":[121],"their":[123],"effectiveness":[124],"tasks.":[127],"best-performing":[129],"classifier":[130],"was":[131],"then":[132],"utilized":[133],"proof-of-concept":[136],"implementation.":[137],"It":[138],"anticipated":[140],"collecting":[142],"such":[143],"will":[145],"foster":[146],"greater":[147],"inclusivity,":[148],"enabling":[149],"development":[151],"language":[153],"technologies":[154],"cater":[156],"broader":[159,175],"landscape":[161],"promote":[163],"cultural":[164],"preservation":[165],"digital":[168],"era.":[169],"work":[171],"contributes":[172],"efforts":[176],"preserving":[178],"promoting":[182],"inclusive":[183],"technological":[184],"solutions":[185],"multilingual":[187],"societies.":[188]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
