{"id":"https://openalex.org/W7138720653","doi":"https://doi.org/10.48550/arxiv.2603.13793","title":"GhanaNLP Parallel Corpora: Comprehensive Multilingual Resources for Low-Resource Ghanaian Languages","display_name":"GhanaNLP Parallel Corpora: Comprehensive Multilingual Resources for Low-Resource Ghanaian Languages","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138720653","doi":"https://doi.org/10.48550/arxiv.2603.13793"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13793","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13793","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13793","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030372321","display_name":"Lawrence Adu Gyamfi","orcid":"https://orcid.org/0000-0002-4100-8717"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gyamfi, Lawrence Adu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057911516","display_name":"Paul Azunre","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Azunre, Paul","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066938627","display_name":"Stephen E. Moore","orcid":"https://orcid.org/0000-0002-5670-4176"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moore, Stephen Edward","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088813814","display_name":"Joel Budu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Budu, Joel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129669955","display_name":"Akwasi Asare","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Asare, Akwasi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129653713","display_name":"Mich-Seth Owusu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Owusu, Mich-Seth","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129685689","display_name":"Jonathan Ofori Asiamah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Asiamah, Jonathan Ofori","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5030372321"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13194","display_name":"ICT in Developing Communities","score":0.34049999713897705,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13194","display_name":"ICT in Developing Communities","score":0.34049999713897705,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.20280000567436218,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.03229999914765358,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.685699999332428},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5231999754905701},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5220999717712402},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5144000053405762},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.48980000615119934},{"id":"https://openalex.org/keywords/languages-of-africa","display_name":"Languages of Africa","score":0.3878999948501587},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.3833000063896179},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.382999986410141}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7849000096321106},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.685699999332428},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5231999754905701},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5220999717712402},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5144000053405762},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4909999966621399},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.48980000615119934},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4756999909877777},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40720000863075256},{"id":"https://openalex.org/C108494575","wikidata":"https://www.wikidata.org/wiki/Q207930","display_name":"Languages of Africa","level":2,"score":0.3878999948501587},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3833000063896179},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.382999986410141},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3617999851703644},{"id":"https://openalex.org/C14919245","wikidata":"https://www.wikidata.org/wiki/Q1976109","display_name":"Language technology","level":4,"score":0.3409999907016754},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.32440000772476196},{"id":"https://openalex.org/C2986862884","wikidata":"https://www.wikidata.org/wiki/Q7553","display_name":"Language translation","level":3,"score":0.3190999925136566},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.31630000472068787},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.3086000084877014},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3043000102043152},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.29750001430511475},{"id":"https://openalex.org/C1813318","wikidata":"https://www.wikidata.org/wiki/Q2465887","display_name":"Language industry","level":4,"score":0.29120001196861267},{"id":"https://openalex.org/C47602998","wikidata":"https://www.wikidata.org/wiki/Q2588869","display_name":"Language barrier","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C2780922921","wikidata":"https://www.wikidata.org/wiki/Q255189","display_name":"Paraphrase","level":2,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13793","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13793","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13793","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13793","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7966325879096985,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Low":[0],"resource":[1],"languages":[2],"present":[3],"unique":[4],"challenges":[5],"for":[6,37,158],"natural":[7],"language":[8,69,110,156],"processing":[9],"due":[10],"to":[11,88,97,145,148],"the":[12,26,38,115,136],"limited":[13],"availability":[14],"of":[15,61],"digitized":[16],"and":[17,31,43,70,77,82,91,101,109,123,154],"well":[18,126],"structured":[19],"linguistic":[20],"data.":[21],"To":[22],"address":[23],"this":[24,142],"gap,":[25],"GhanaNLP":[27],"initiative":[28],"has":[29],"developed":[30],"curated":[32],"41,513":[33],"parallel":[34],"sentence":[35,64],"pairs":[36,65],"Twi,":[39],"Fante,":[40],"Ewe,":[41],"Ga,":[42],"Kusaal":[44],"languages,":[45],"which":[46],"are":[47,95],"widely":[48],"spoken":[49],"across":[50],"Ghana":[51],"yet":[52],"remain":[53],"underrepresented":[54],"in":[55,130],"digital":[56],"spaces.":[57],"Each":[58],"dataset":[59,116],"consists":[60],"carefully":[62],"aligned":[63],"between":[66],"a":[67],"local":[68],"English.":[71],"The":[72],"data":[73],"were":[74],"collected,":[75],"translated,":[76],"annotated":[78],"by":[79,151],"human":[80],"professionals":[81],"enriched":[83],"with":[84],"standard":[85],"structural":[86],"metadata":[87],"ensure":[89],"consistency":[90],"usability.":[92],"These":[93],"corpora":[94],"designed":[96],"support":[98],"research,":[99],"educational,":[100],"commercial":[102],"applications,":[103],"including":[104],"machine":[105],"translation,":[106],"speech":[107],"technologies,":[108],"preservation.":[111],"This":[112],"paper":[113],"documents":[114],"creation":[117],"methodology,":[118],"structure,":[119],"intended":[120],"use":[121],"cases,":[122],"evaluation,":[124],"as":[125,127,135],"their":[128],"deployment":[129],"real":[131],"world":[132],"applications":[133],"such":[134],"Khaya":[137],"AI":[138,150],"translation":[139],"engine.":[140],"Overall,":[141],"work":[143],"contributes":[144],"broader":[146],"efforts":[147],"democratize":[149],"enabling":[152],"inclusive":[153],"accessible":[155],"technologies":[157],"African":[159],"languages.":[160]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
