{"id":"https://openalex.org/W4400526322","doi":"https://doi.org/10.1145/3626772.3657952","title":"On Backbones and Training Regimes for Dense Retrieval in African Languages","display_name":"On Backbones and Training Regimes for Dense Retrieval in African Languages","publication_year":2024,"publication_date":"2024-07-10","ids":{"openalex":"https://openalex.org/W4400526322","doi":"https://doi.org/10.1145/3626772.3657952"},"language":"en","primary_location":{"id":"doi:10.1145/3626772.3657952","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3626772.3657952","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060546041","display_name":"Akintunde Oladipo","orcid":"https://orcid.org/0009-0000-2630-8167"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Akintunde Oladipo","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076302977","display_name":"Mofetoluwa Adeyemi","orcid":"https://orcid.org/0009-0003-2859-7136"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mofetoluwa Adeyemi","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082997975","display_name":"Jimmy Lin","orcid":"https://orcid.org/0000-0002-0661-7189"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jimmy Lin","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5060546041"],"corresponding_institution_ids":["https://openalex.org/I151746483"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08409317,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2564","last_page":"2568"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9729999899864197,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9729999899864197,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13912","display_name":"Language, Linguistics, Cultural Analysis","score":0.9082000255584717,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6981526017189026},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6711494326591492},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.41614145040512085},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3908420205116272},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.10842731595039368},{"id":"https://openalex.org/keywords/meteorology","display_name":"Meteorology","score":0.055789798498153687}],"concepts":[{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6981526017189026},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6711494326591492},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41614145040512085},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3908420205116272},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.10842731595039368},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.055789798498153687}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3626772.3657952","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3626772.3657952","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2607303097","https://openalex.org/W2912814679","https://openalex.org/W2930957955","https://openalex.org/W3021397474","https://openalex.org/W3093517588","https://openalex.org/W4205293907","https://openalex.org/W4385572824","https://openalex.org/W4385780698","https://openalex.org/W4386488973","https://openalex.org/W4389518372","https://openalex.org/W4389518627","https://openalex.org/W4389523795","https://openalex.org/W4392669860"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W230091440","https://openalex.org/W2390279801","https://openalex.org/W2233261550","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0,23],"effectiveness":[1,74,95,131],"of":[2,26,48,58,69,96,125,132,148,163],"dense":[3,102,133],"retrieval":[4,21,33,40,73,103,134,157,164],"models":[5,10,51,104,108],"trained":[6],"with":[7],"multilingual":[8,17,80,106],"language":[9,50,107],"as":[11,42,44,98,110],"backbones":[12],"has":[13],"been":[14],"demonstrated":[15],"in":[16,135,156,160],"and":[18,52,86,112],"cross-lingual":[19],"information":[20],"contexts.":[22],"optimal":[24],"choice":[25],"a":[27,31,99,153],"backbone":[28,100,150],"model":[29],"for":[30,75,101,117,138],"given":[32],"task":[34],"is":[35],"dependent":[36],"on":[37,72,129],"the":[38,45,56,67,87,94,123,130,145,149,161],"target":[39],"domain":[41,47,147],"well":[43],"pre-training":[46,146],"available":[49,170],"their":[53],"generalization":[54],"capabilities,":[55],"availability":[57],"relevance":[59],"judgements,":[60],"etc.":[61],"In":[62],"this":[63],"work,":[64],"we":[65,121],"study":[66],"impact":[68,124],"these":[70],"factors":[71],"African":[76,118,139],"languages":[77],"using":[78],"three":[79],"benchmark":[81],"datasets:":[82],"Mr.":[83],"TyDi,":[84],"MIRACL,":[85],"newly":[88],"released":[89],"CIRAL":[90],"dataset.":[91],"We":[92],"compare":[93],"mBERT":[97],"against":[105],"such":[109],"AfriBERTa":[111],"AfroXLMR,":[113],"which":[114],"are":[115,169],"specialized":[116],"languages.":[119,140],"Furthermore,":[120],"examine":[122],"different":[126,136],"training":[127,165],"regimes":[128],"domains":[137],"Our":[141],"findings":[142],"show":[143],"that":[144],"LM":[151],"plays":[152],"huge":[154],"role":[155],"effectiveness,":[158],"especially":[159],"absence":[162],"data.":[166],"Code":[167],"artifacts":[168],"at":[171],"https://github.com/castorini/afridpr_backbones.":[172]},"counts_by_year":[],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
