{"id":"https://openalex.org/W2560082712","doi":"https://doi.org/10.1145/3015157.3015160","title":"Assessing the Impact of Vocabulary Similarity on Multilingual Information Retrieval for Bantu Languages","display_name":"Assessing the Impact of Vocabulary Similarity on Multilingual Information Retrieval for Bantu Languages","publication_year":2016,"publication_date":"2016-12-07","ids":{"openalex":"https://openalex.org/W2560082712","doi":"https://doi.org/10.1145/3015157.3015160","mag":"2560082712"},"language":"en","primary_location":{"id":"doi:10.1145/3015157.3015160","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3015157.3015160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th Annual Meeting of the Forum for Information Retrieval Evaluation","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087304429","display_name":"Catherine Chavula","orcid":"https://orcid.org/0000-0002-6113-1691"},"institutions":[{"id":"https://openalex.org/I157614274","display_name":"University of Cape Town","ror":"https://ror.org/03p74gp79","country_code":"ZA","type":"education","lineage":["https://openalex.org/I157614274"]}],"countries":["ZA"],"is_corresponding":true,"raw_author_name":"Catherine Chavula","raw_affiliation_strings":["University of Cape Town, Department of Computer Science"],"affiliations":[{"raw_affiliation_string":"University of Cape Town, Department of Computer Science","institution_ids":["https://openalex.org/I157614274"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009415079","display_name":"Hussein Suleman","orcid":"https://orcid.org/0000-0002-4196-1444"},"institutions":[{"id":"https://openalex.org/I157614274","display_name":"University of Cape Town","ror":"https://ror.org/03p74gp79","country_code":"ZA","type":"education","lineage":["https://openalex.org/I157614274"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Hussein Suleman","raw_affiliation_strings":["University of Cape Town, Department of Computer Science"],"affiliations":[{"raw_affiliation_string":"University of Cape Town, Department of Computer Science","institution_ids":["https://openalex.org/I157614274"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5087304429"],"corresponding_institution_ids":["https://openalex.org/I157614274"],"apc_list":null,"apc_paid":null,"fwci":1.9904,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.90249286,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"16","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bantu-languages","display_name":"Bantu languages","score":0.951708972454071},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7428449392318726},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6495449542999268},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.64234858751297},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5971697568893433},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5541924238204956},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4260832369327545},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4237234890460968}],"concepts":[{"id":"https://openalex.org/C99878080","wikidata":"https://www.wikidata.org/wiki/Q33146","display_name":"Bantu languages","level":2,"score":0.951708972454071},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7428449392318726},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6495449542999268},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.64234858751297},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5971697568893433},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5541924238204956},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4260832369327545},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4237234890460968},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3015157.3015160","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3015157.3015160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th Annual Meeting of the Forum for Information Retrieval Evaluation","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8299999833106995}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W58016661","https://openalex.org/W72960384","https://openalex.org/W202663801","https://openalex.org/W872435579","https://openalex.org/W1480223262","https://openalex.org/W1483313504","https://openalex.org/W1507150160","https://openalex.org/W1518518886","https://openalex.org/W1526974435","https://openalex.org/W1532889113","https://openalex.org/W1573481780","https://openalex.org/W1583213108","https://openalex.org/W1660390307","https://openalex.org/W1987718503","https://openalex.org/W1997682167","https://openalex.org/W2037959956","https://openalex.org/W2053528567","https://openalex.org/W2055563597","https://openalex.org/W2064550430","https://openalex.org/W2069870183","https://openalex.org/W2085981754","https://openalex.org/W2129094221","https://openalex.org/W2152577431","https://openalex.org/W2289888486","https://openalex.org/W2295481403","https://openalex.org/W2476361705","https://openalex.org/W2581848328","https://openalex.org/W2597643885","https://openalex.org/W4233337856","https://openalex.org/W4238700636","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W3018705524","https://openalex.org/W2620573137","https://openalex.org/W1981660290","https://openalex.org/W2001620595","https://openalex.org/W1972655698","https://openalex.org/W1984171833","https://openalex.org/W3107474891","https://openalex.org/W2038246283","https://openalex.org/W2349125667","https://openalex.org/W4281690070"],"abstract_inverted_index":{"Despite":[0],"the":[1,13,28,41,77,90,101,127],"availability":[2],"of":[3,79,103,113],"massive":[4],"open":[5],"information":[6,61],"and":[7,97,118,120,124,136],"efforts":[8],"to":[9,56],"promote":[10],"multilingualism":[11],"on":[12,76,100],"Web,":[14],"content":[15],"in":[16,82,126,144],"Bantu":[17,115],"languages":[18,38,47,64],"remains":[19],"negligible.":[20],"Additionally,":[21],"Information":[22,84],"Retrieval":[23,85],"(IR)":[24],"systems,":[25],"such":[26,48],"as":[27,49],"Google":[29],"search":[30,80],"engine,":[31],"use":[32],"algorithms":[33],"that":[34,39,132],"work":[35],"well":[36],"with":[37,65],"have":[40],"most":[42],"content.":[43,68],"Similarities":[44],"across":[45],"related":[46,106,135],"vocabulary":[50,73],"overlap":[51],"can":[52],"potentially":[53],"be":[54],"exploited":[55],"provide":[57],"more":[58],"opportunities":[59],"for":[60,63,95,105],"access":[62],"limited":[66],"digital":[67],"This":[69],"study":[70,91],"investigates":[71],"how":[72],"similarity":[74],"impacts":[75],"quality":[78,102],"results":[81,130],"Multilingual":[83],"(MLIR)":[86],"environments.":[87],"More":[88],"specifically,":[89],"evaluates":[92],"indexing":[93,141],"strategies":[94,142],"MLIR":[96,140],"their":[98],"effect":[99],"retrieval":[104,148],"languages.":[107],"A":[108],"multilingual":[109],"test":[110],"collection":[111],"consisting":[112],"two":[114],"languages,":[116],"Citumbuka":[117],"Chichewa,":[119],"English":[121],"was":[122],"developed":[123],"used":[125],"evaluation.":[128],"The":[129],"show":[131],"when":[133],"comparing":[134],"unrelated":[137],"language":[138],"pairs,":[139],"result":[143],"comparable":[145],"or":[146],"worse":[147],"performance.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
