{"id":"https://openalex.org/W7153318930","doi":"https://doi.org/10.48550/arxiv.2604.08448","title":"AfriVoices-KE: A Multilingual Speech Dataset for Kenyan Languages","display_name":"AfriVoices-KE: A Multilingual Speech Dataset for Kenyan Languages","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7153318930","doi":"https://doi.org/10.48550/arxiv.2604.08448"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.08448","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08448","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.08448","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133336372","display_name":"Lilian Wanzare","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wanzare, Lilian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133378032","display_name":"Cynthia Amol","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Amol, Cynthia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133315796","display_name":"zekiel Maina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maina, zekiel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133352814","display_name":"Nelson Odhiambo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Odhiambo, Nelson","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133386423","display_name":"Hope Kerubo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kerubo, Hope","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133317006","display_name":"Leila Misula","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Misula, Leila","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049185110","display_name":"Vivian Anyango Oloo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oloo, Vivian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133332742","display_name":"Rennish Mboya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mboya, Rennish","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119304714","display_name":"Edwin Onkoba","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Onkoba, Edwin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071646145","display_name":"Edward Ombui","orcid":"https://orcid.org/0000-0002-1787-0681"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ombui, Edward","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079185777","display_name":"Joseph Muguro","orcid":"https://orcid.org/0000-0003-3343-3614"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muguro, Joseph","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088432815","display_name":"Ciira wa Maina","orcid":"https://orcid.org/0000-0003-4203-3129"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maina, Ciira wa","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123619156","display_name":"Andrew Kipkebut","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kipkebut, Andrew","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133317659","display_name":"Alfred Omondi Otom","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Otom, Alfred Omondi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133376154","display_name":"Ian Ndung'u Kang'ethe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kang'ethe, Ian Ndung'u","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133328139","display_name":"Angela Wambui Kanyi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kanyi, Angela Wambui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5055411217","display_name":"Brian Omwenga","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Omwenga, Brian Gichana","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":17,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13194","display_name":"ICT in Developing Communities","score":0.26829999685287476,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13194","display_name":"ICT in Developing Communities","score":0.26829999685287476,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.25,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.03880000114440918,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/kenya","display_name":"Kenya","score":0.45879998803138733},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.4226999878883362},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.4185999929904938},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.35530000925064087},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3434999883174896},{"id":"https://openalex.org/keywords/data-collection","display_name":"Data collection","score":0.32690000534057617},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.3224000036716461},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.31769999861717224}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6823999881744385},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.525600016117096},{"id":"https://openalex.org/C187651312","wikidata":"https://www.wikidata.org/wiki/Q114","display_name":"Kenya","level":2,"score":0.45879998803138733},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.4226999878883362},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.4185999929904938},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.396699994802475},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.35530000925064087},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3452000021934509},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3434999883174896},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.3224000036716461},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.31769999861717224},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3127000033855438},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.30660000443458557},{"id":"https://openalex.org/C2779881370","wikidata":"https://www.wikidata.org/wiki/Q383340","display_name":"Speech community","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C2992249680","wikidata":"https://www.wikidata.org/wiki/Q315","display_name":"Linguistic diversity","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.2743000090122223},{"id":"https://openalex.org/C14919245","wikidata":"https://www.wikidata.org/wiki/Q1976109","display_name":"Language technology","level":4,"score":0.2671000063419342},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.26159998774528503},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.25529998540878296},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.08448","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08448","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.08448","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08448","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"AfriVoices-KE":[0,171],"is":[1],"a":[2,61,69,173],"large-scale":[3],"multilingual":[4],"speech":[5,30,57,94,180],"dataset":[6,24],"comprising":[7],"approximately":[8],"3,000":[9],"hours":[10,27,33],"of":[11,28,34,53,190],"audio":[12],"across":[13,42],"five":[14],"Kenyan":[15,90],"languages:":[16],"Dholuo,":[17],"Kikuyu,":[18],"Kalenjin,":[19],"Maasai,":[20],"and":[21,31,45,80,99,107,134,155,167,182],"Somali.":[22],"The":[23],"includes":[25],"750":[26],"scripted":[29,72],"2,250":[32],"spontaneous":[35],"speech,":[36],"collected":[37],"from":[38,75],"4,777":[39],"native":[40],"speakers":[41],"diverse":[43,64],"regions":[44],"demographics.":[46],"This":[47],"work":[48],"addresses":[49],"the":[50,89,141,187],"critical":[51],"underrepresentation":[52],"African":[54],"languages":[55],"in":[56],"technology":[58],"by":[59],"providing":[60],"high-quality,":[62],"linguistically":[63],"resource.":[65],"Data":[66],"collection":[67],"followed":[68],"dual":[70],"methodology:":[71],"recordings":[73],"drew":[74],"compiled":[76],"text":[77],"corpora,":[78],"translations,":[79],"domain-specific":[81],"generated":[82],"sentences":[83],"spanning":[84],"eleven":[85],"domains":[86],"relevant":[87],"to":[88,102,116,132,146],"context,":[91],"while":[92,185],"unscripted":[93],"was":[95],"elicited":[96],"through":[97,162],"textual":[98],"image":[100],"prompts":[101],"capture":[103],"natural":[104],"linguistic":[105,192],"variation":[106],"dialectal":[108],"nuances.":[109],"A":[110],"customized":[111],"mobile":[112],"application":[113],"enabled":[114],"contributors":[115],"record":[117],"using":[118],"smartphones.":[119],"Quality":[120],"assurance":[121],"operated":[122],"at":[123],"multiple":[124],"layers,":[125],"encompassing":[126],"automated":[127],"signal-to-noise":[128],"ratio":[129],"validation":[130],"prior":[131],"recording":[133],"human":[135],"review":[136],"for":[137,176],"content":[138],"accuracy.":[139],"Though":[140],"project":[142],"encountered":[143],"challenges":[144],"common":[145],"low-resource":[147],"settings,":[148],"including":[149],"unreliable":[150],"infrastructure,":[151],"device":[152],"compatibility":[153],"issues,":[154],"community":[156],"trust":[157],"barriers,":[158],"these":[159],"were":[160],"mitigated":[161],"local":[163],"mobilizers,":[164],"stakeholder":[165],"partnerships,":[166],"adaptive":[168],"training":[169],"protocols.":[170],"provides":[172],"foundational":[174],"resource":[175],"developing":[177],"inclusive":[178],"automatic":[179],"recognition":[181],"text-to-speech":[183],"systems,":[184],"advancing":[186],"digital":[188],"preservation":[189],"Kenya's":[191],"heritage.":[193]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-11T00:00:00"}
