{"id":"https://openalex.org/W4394713365","doi":"https://doi.org/10.1109/bigcomp60711.2024.00067","title":"Effects of Swahili Monolingual Tokenizer on Downstream Tasks","display_name":"Effects of Swahili Monolingual Tokenizer on Downstream Tasks","publication_year":2024,"publication_date":"2024-02-18","ids":{"openalex":"https://openalex.org/W4394713365","doi":"https://doi.org/10.1109/bigcomp60711.2024.00067"},"language":"en","primary_location":{"id":"doi:10.1109/bigcomp60711.2024.00067","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigcomp60711.2024.00067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data and Smart Computing (BigComp)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093911860","display_name":"Goodwill Erasmo Ndomba","orcid":"https://orcid.org/0009-0009-6842-3548"},"institutions":[{"id":"https://openalex.org/I163753206","display_name":"Chungbuk National University","ror":"https://ror.org/02wnxgj78","country_code":"KR","type":"education","lineage":["https://openalex.org/I163753206"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Goodwill Erasmo Ndomba","raw_affiliation_strings":["Chungbuk National University,Dept. of Computer Engineering,Cheongju,South Korea","Dept. of Computer Engineering, Chungbuk National University, Cheongju, South Korea"],"affiliations":[{"raw_affiliation_string":"Chungbuk National University,Dept. of Computer Engineering,Cheongju,South Korea","institution_ids":["https://openalex.org/I163753206"]},{"raw_affiliation_string":"Dept. of Computer Engineering, Chungbuk National University, Cheongju, South Korea","institution_ids":["https://openalex.org/I163753206"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005505577","display_name":"Young-Seob Jeong","orcid":"https://orcid.org/0000-0002-9441-2940"},"institutions":[{"id":"https://openalex.org/I163753206","display_name":"Chungbuk National University","ror":"https://ror.org/02wnxgj78","country_code":"KR","type":"education","lineage":["https://openalex.org/I163753206"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Young-Seob Jeong","raw_affiliation_strings":["Chungbuk National University,Dept. of Computer Engineering,Cheongju,South Korea","Dept. of Computer Engineering, Chungbuk National University, Cheongju, South Korea"],"affiliations":[{"raw_affiliation_string":"Chungbuk National University,Dept. of Computer Engineering,Cheongju,South Korea","institution_ids":["https://openalex.org/I163753206"]},{"raw_affiliation_string":"Dept. of Computer Engineering, Chungbuk National University, Cheongju, South Korea","institution_ids":["https://openalex.org/I163753206"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5093911860"],"corresponding_institution_ids":["https://openalex.org/I163753206"],"apc_list":null,"apc_paid":null,"fwci":2.863,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.87231839,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"357","last_page":"358"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10021","display_name":"EFL/ESL Teaching and Learning","score":0.8521000146865845,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10021","display_name":"EFL/ESL Teaching and Learning","score":0.8521000146865845,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.7732999920845032,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/swahili","display_name":"Swahili","score":0.8871949911117554},{"id":"https://openalex.org/keywords/downstream","display_name":"Downstream (manufacturing)","score":0.7589723467826843},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7125760316848755},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4401329457759857},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2617698609828949},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.05903136730194092},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.0425553023815155}],"concepts":[{"id":"https://openalex.org/C2779913364","wikidata":"https://www.wikidata.org/wiki/Q7838","display_name":"Swahili","level":2,"score":0.8871949911117554},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.7589723467826843},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7125760316848755},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4401329457759857},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2617698609828949},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.05903136730194092},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0425553023815155},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigcomp60711.2024.00067","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigcomp60711.2024.00067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data and Smart Computing (BigComp)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/2","display_name":"Zero hunger","score":0.4000000059604645}],"awards":[{"id":"https://openalex.org/G4081998342","display_name":null,"funder_award_id":"NRF-2020R1I1A3053015","funder_id":"https://openalex.org/F4320311649","funder_display_name":"Ministry of Education"}],"funders":[{"id":"https://openalex.org/F4320311649","display_name":"Ministry of Education","ror":"https://ror.org/036nq5137"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2987270981","https://openalex.org/W3093721400","https://openalex.org/W3155913599","https://openalex.org/W4250024550","https://openalex.org/W4287854589","https://openalex.org/W4360951492","https://openalex.org/W4385245566","https://openalex.org/W6770709732","https://openalex.org/W6784511839","https://openalex.org/W6793842883"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2024870452","https://openalex.org/W4232455167","https://openalex.org/W2589694904","https://openalex.org/W2023610355","https://openalex.org/W1964726137","https://openalex.org/W2197846837","https://openalex.org/W2966567079","https://openalex.org/W3182526644"],"abstract_inverted_index":{"The":[0],"introduction":[1],"of":[2,10,26,40],"transformer":[3],"[7]":[4],"has":[5],"led":[6],"to":[7],"the":[8,23],"development":[9],"large":[11],"language":[12],"models":[13],"(LLMs)":[14],"like":[15],"Bidirectional":[16],"Encoder":[17],"Representations":[18],"from":[19],"Transformers":[20],"(BERT).":[21],"However,":[22],"downstream":[24],"performance":[25],"LLMs":[27],"is":[28],"often":[29],"poor":[30],"for":[31],"low":[32],"resourced":[33],"languages":[34],"(LRLs)":[35],"such":[36],"as":[37],"Swahili":[38],"because":[39],"their":[41],"small":[42],"dataset":[43],"size.":[44]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
