{"id":"https://openalex.org/W4415870376","doi":"https://doi.org/10.1145/3757232.3757340","title":"Capturing Linguistic Diversity in Data Annotation","display_name":"Capturing Linguistic Diversity in Data Annotation","publication_year":2025,"publication_date":"2025-11-04","ids":{"openalex":"https://openalex.org/W4415870376","doi":"https://doi.org/10.1145/3757232.3757340"},"language":null,"primary_location":{"id":"doi:10.1145/3757232.3757340","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757232.3757340","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fifth Biennial African Human-Computer Interaction Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120248413","display_name":"Wangui Kamande","orcid":"https://orcid.org/0009-0009-9153-0157"},"institutions":[{"id":"https://openalex.org/I153975220","display_name":"Kenyatta University","ror":"https://ror.org/05p2z3x69","country_code":"KE","type":"education","lineage":["https://openalex.org/I153975220"]}],"countries":["KE"],"is_corresponding":true,"raw_author_name":"Wangui Kamande","raw_affiliation_strings":["Sama, Nairobi, Kenya"],"raw_orcid":"https://orcid.org/0009-0009-9153-0157","affiliations":[{"raw_affiliation_string":"Sama, Nairobi, Kenya","institution_ids":["https://openalex.org/I153975220"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013212846","display_name":"Claudel Rheault","orcid":"https://orcid.org/0009-0009-4675-5808"},"institutions":[{"id":"https://openalex.org/I4210162901","display_name":"Strateji Ara\u015fd\u0131rmalar M\u0259rk\u0259zi","ror":"https://ror.org/05j0jp984","country_code":"AZ","type":"nonprofit","lineage":["https://openalex.org/I4210162901"]}],"countries":["AZ"],"is_corresponding":false,"raw_author_name":"Claudel Rheault","raw_affiliation_strings":["Sama, Montreal, Canada"],"raw_orcid":"https://orcid.org/0009-0009-4675-5808","affiliations":[{"raw_affiliation_string":"Sama, Montreal, Canada","institution_ids":["https://openalex.org/I4210162901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120248414","display_name":"Margret Gatwiri","orcid":"https://orcid.org/0000-0002-7737-0793"},"institutions":[{"id":"https://openalex.org/I153975220","display_name":"Kenyatta University","ror":"https://ror.org/05p2z3x69","country_code":"KE","type":"education","lineage":["https://openalex.org/I153975220"]}],"countries":["KE"],"is_corresponding":false,"raw_author_name":"Margret Gatwiri","raw_affiliation_strings":["Sama, Nairobi, Kenya"],"raw_orcid":"https://orcid.org/0000-0002-7737-0793","affiliations":[{"raw_affiliation_string":"Sama, Nairobi, Kenya","institution_ids":["https://openalex.org/I153975220"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085015056","display_name":"Nicolas Duch\u00eane","orcid":"https://orcid.org/0009-0001-1398-9175"},"institutions":[{"id":"https://openalex.org/I4210162901","display_name":"Strateji Ara\u015fd\u0131rmalar M\u0259rk\u0259zi","ror":"https://ror.org/05j0jp984","country_code":"AZ","type":"nonprofit","lineage":["https://openalex.org/I4210162901"]}],"countries":["AZ"],"is_corresponding":false,"raw_author_name":"Nicolas Duch\u00eane","raw_affiliation_strings":["Sama, Montreal, Canada"],"raw_orcid":"https://orcid.org/0009-0001-1398-9175","affiliations":[{"raw_affiliation_string":"Sama, Montreal, Canada","institution_ids":["https://openalex.org/I4210162901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120248415","display_name":"Bryan Gachambi","orcid":"https://orcid.org/0009-0000-6348-2885"},"institutions":[{"id":"https://openalex.org/I153975220","display_name":"Kenyatta University","ror":"https://ror.org/05p2z3x69","country_code":"KE","type":"education","lineage":["https://openalex.org/I153975220"]}],"countries":["KE"],"is_corresponding":false,"raw_author_name":"Bryan Gachambi","raw_affiliation_strings":["Sama, Nairobi, Kenya"],"raw_orcid":"https://orcid.org/0009-0000-6348-2885","affiliations":[{"raw_affiliation_string":"Sama, Nairobi, Kenya","institution_ids":["https://openalex.org/I153975220"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040853478","display_name":"Philippe Jauffret","orcid":"https://orcid.org/0009-0000-3419-2950"},"institutions":[{"id":"https://openalex.org/I4210162901","display_name":"Strateji Ara\u015fd\u0131rmalar M\u0259rk\u0259zi","ror":"https://ror.org/05j0jp984","country_code":"AZ","type":"nonprofit","lineage":["https://openalex.org/I4210162901"]}],"countries":["AZ"],"is_corresponding":false,"raw_author_name":"Pascal Jauffret","raw_affiliation_strings":["Sama, Montreal, Canada"],"raw_orcid":"https://orcid.org/0009-0000-3419-2950","affiliations":[{"raw_affiliation_string":"Sama, Montreal, Canada","institution_ids":["https://openalex.org/I4210162901"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5120248413"],"corresponding_institution_ids":["https://openalex.org/I153975220"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.44801973,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"454","last_page":"458"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13194","display_name":"ICT in Developing Communities","score":0.8439000248908997,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13194","display_name":"ICT in Developing Communities","score":0.8439000248908997,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11024","display_name":"Information Systems Theories and Implementation","score":0.016899999231100082,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.009600000455975533,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/swahili","display_name":"Swahili","score":0.9376000165939331},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5981000065803528},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.49070000648498535},{"id":"https://openalex.org/keywords/linguistic-diversity","display_name":"Linguistic diversity","score":0.4000999927520752},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.3614000082015991},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.3517000079154968}],"concepts":[{"id":"https://openalex.org/C2779913364","wikidata":"https://www.wikidata.org/wiki/Q7838","display_name":"Swahili","level":2,"score":0.9376000165939331},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5981000065803528},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5651000142097473},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.49070000648498535},{"id":"https://openalex.org/C2992249680","wikidata":"https://www.wikidata.org/wiki/Q315","display_name":"Linguistic diversity","level":2,"score":0.4000999927520752},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.39590001106262207},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.3614000082015991},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3517000079154968},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.31790000200271606},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.29089999198913574},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28519999980926514},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C154775046","wikidata":"https://www.wikidata.org/wiki/Q188","display_name":"German","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2671999931335449},{"id":"https://openalex.org/C108494575","wikidata":"https://www.wikidata.org/wiki/Q207930","display_name":"Languages of Africa","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3757232.3757340","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757232.3757340","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fifth Biennial African Human-Computer Interaction Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2089681120","https://openalex.org/W2607311634","https://openalex.org/W2947142783","https://openalex.org/W3093541323","https://openalex.org/W3158839228","https://openalex.org/W4224981752","https://openalex.org/W4287854589","https://openalex.org/W4309619003","https://openalex.org/W4392929788","https://openalex.org/W4396831993","https://openalex.org/W4402343112","https://openalex.org/W4403334020","https://openalex.org/W4404690131","https://openalex.org/W4404783175","https://openalex.org/W4404783774","https://openalex.org/W4409403733","https://openalex.org/W4415796340"],"related_works":[],"abstract_inverted_index":{"Africa":[0],"is":[1,11,46],"the":[2,6,41,60,63,66,88,114,150],"fastest-growing":[3],"continent,":[4],"but":[5],"number":[7],"of":[8,62,106,153],"Afro-centric":[9],"datasets":[10],"still":[12],"limited.":[13],"African":[14],"languages":[15,37],"like":[16],"Swahili":[17,117],"are":[18,28],"getting":[19],"more":[20],"traction":[21],"in":[22,99,116,121],"foundation":[23],"models,":[24],"and":[25,33,58,90,123,131],"new":[26],"initiatives":[27],"focused":[29],"on":[30],"collecting":[31],"written":[32],"audio":[34],"data":[35,43,56,82,128,144],"for":[36,103,126,133],"currently":[38],"underrepresented.":[39],"Yet,":[40],"way":[42],"enrichment":[44,83,129,145],"works":[45],"often":[47],"dictated":[48],"by":[49],"how":[50],"technology":[51],"companies":[52],"have":[53],"been":[54],"doing":[55,65,77,113],"labeling,":[57],"ignores":[59],"perspectives":[61,111],"people":[64],"tasks.":[67],"In":[68],"this":[69,104],"case":[70],"study,":[71],"we":[72,138],"share":[73],"our":[74],"learnings":[75],"from":[76,112],"an":[78],"end-to-end":[79],"project":[80],"with":[81],"workers,":[84],"where":[85],"they":[86,96],"built":[87],"prompt":[89],"then":[91],"performed":[92],"tasks,":[93],"after":[94],"which":[95],"could":[97],"participate":[98],"imagining":[100],"future":[101,127],"tools":[102,130],"type":[105],"work.":[107],"We":[108],"contrast":[109],"user":[110],"work":[115],"as":[118],"compared":[119],"to":[120,142],"English,":[122],"present":[124],"ideas":[125],"process":[132],"capturing":[134],"language":[135],"nuances.":[136],"Finally,":[137],"suggest":[139],"different":[140],"approaches":[141],"building":[143],"paradigms":[146],"that":[147],"would":[148],"encourage":[149],"communal":[151],"realities":[152],"Swahili.":[154]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-11-04T00:00:00"}
