{"id":"https://openalex.org/W4386488973","doi":"https://doi.org/10.1162/tacl_a_00595","title":"<b>MIRACL</b>: A Multilingual Retrieval Dataset Covering 18 Diverse Languages","display_name":"<b>MIRACL</b>: A Multilingual Retrieval Dataset Covering 18 Diverse Languages","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4386488973","doi":"https://doi.org/10.1162/tacl_a_00595"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00595","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00595","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00595/2157340/tacl_a_00595.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00595/2157340/tacl_a_00595.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100390688","display_name":"Xinyu Zhang","orcid":"https://orcid.org/0000-0002-0037-1122"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Xinyu Zhang","raw_affiliation_strings":["David R. Cheriton School of Computer Science, University of Waterloo, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"David R. Cheriton School of Computer Science, University of Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052977545","display_name":"Nandan Thakur","orcid":"https://orcid.org/0000-0001-6107-2460"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Nandan Thakur","raw_affiliation_strings":["David R. Cheriton School of Computer Science, University of Waterloo, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"David R. Cheriton School of Computer Science, University of Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077701496","display_name":"Odunayo Ogundepo","orcid":null},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Odunayo Ogundepo","raw_affiliation_strings":["David R. Cheriton School of Computer Science, University of Waterloo, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"David R. Cheriton School of Computer Science, University of Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043026875","display_name":"Ehsan Kamalloo","orcid":"https://orcid.org/0000-0003-3081-8762"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ehsan Kamalloo","raw_affiliation_strings":["David R. Cheriton School of Computer Science, University of Waterloo, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"David R. Cheriton School of Computer Science, University of Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064431976","display_name":"David Alfonso-Hermelo","orcid":"https://orcid.org/0009-0009-4591-3077"},"institutions":[{"id":"https://openalex.org/I4210115038","display_name":"Huawei Technologies (Canada)","ror":"https://ror.org/026venb53","country_code":"CA","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210115038"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"David Alfonso-Hermelo","raw_affiliation_strings":["Huawei Noah\u2019s Ark Lab, Canada","Huawei Noah's Ark Lab, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah\u2019s Ark Lab, Canada","institution_ids":["https://openalex.org/I4210115038"]},{"raw_affiliation_string":"Huawei Noah's Ark Lab, Canada","institution_ids":["https://openalex.org/I4210115038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100373874","display_name":"Xiaoguang Li","orcid":"https://orcid.org/0000-0003-4580-4865"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoguang Li","raw_affiliation_strings":["Huawei Noah\u2019s Ark Lab, China","Huawei Noah's Ark Lab, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah\u2019s Ark Lab, China","institution_ids":["https://openalex.org/I2250955327"]},{"raw_affiliation_string":"Huawei Noah's Ark Lab, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qun Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qun Liu","raw_affiliation_strings":["Huawei Noah\u2019s Ark Lab, China","Huawei Noah's Ark Lab, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah\u2019s Ark Lab, China","institution_ids":["https://openalex.org/I2250955327"]},{"raw_affiliation_string":"Huawei Noah's Ark Lab, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028862918","display_name":"Mehdi Rezagholizadeh","orcid":"https://orcid.org/0000-0003-4014-6007"},"institutions":[{"id":"https://openalex.org/I4210115038","display_name":"Huawei Technologies (Canada)","ror":"https://ror.org/026venb53","country_code":"CA","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210115038"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mehdi Rezagholizadeh","raw_affiliation_strings":["Huawei Noah\u2019s Ark Lab, Canada","Huawei Noah's Ark Lab, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah\u2019s Ark Lab, Canada","institution_ids":["https://openalex.org/I4210115038"]},{"raw_affiliation_string":"Huawei Noah's Ark Lab, Canada","institution_ids":["https://openalex.org/I4210115038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082997975","display_name":"Jimmy Lin","orcid":"https://orcid.org/0000-0002-0661-7189"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jimmy Lin","raw_affiliation_strings":["David R. Cheriton School of Computer Science, University of Waterloo, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"David R. Cheriton School of Computer Science, University of Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100390688"],"corresponding_institution_ids":["https://openalex.org/I151746483"],"apc_list":null,"apc_paid":null,"fwci":6.1086,"has_fulltext":true,"cited_by_count":37,"citation_normalized_percentile":{"value":0.97177438,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"11","issue":null,"first_page":"1114","last_page":"1131"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9783999919891357,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8558186888694763},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.606757402420044},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.5991615653038025},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5849469304084778},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5136975049972534},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5134328603744507},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4974232017993927},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49730589985847473},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4517658054828644},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3282570242881775}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8558186888694763},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.606757402420044},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.5991615653038025},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5849469304084778},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5136975049972534},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5134328603744507},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4974232017993927},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49730589985847473},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4517658054828644},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3282570242881775},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/tacl_a_00595","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00595","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00595/2157340/tacl_a_00595.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:f9f3179fdabd48b6a3423c948d637631","is_oa":true,"landing_page_url":"https://doaj.org/article/f9f3179fdabd48b6a3423c948d637631","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Transactions of the Association for Computational Linguistics, Vol 11 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00595","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00595","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00595/2157340/tacl_a_00595.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6600000262260437}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4386488973.pdf"},"referenced_works_count":57,"referenced_works":["https://openalex.org/W587977456","https://openalex.org/W1583837637","https://openalex.org/W1779279021","https://openalex.org/W2006832571","https://openalex.org/W2038253946","https://openalex.org/W2074999873","https://openalex.org/W2108966388","https://openalex.org/W2160892561","https://openalex.org/W2164628107","https://openalex.org/W2251298241","https://openalex.org/W2810095012","https://openalex.org/W2890225082","https://openalex.org/W2892181857","https://openalex.org/W2899154813","https://openalex.org/W2912924812","https://openalex.org/W2951534261","https://openalex.org/W2952638691","https://openalex.org/W2963339397","https://openalex.org/W2963748441","https://openalex.org/W3014994955","https://openalex.org/W3021300761","https://openalex.org/W3021397474","https://openalex.org/W3035032094","https://openalex.org/W3035579820","https://openalex.org/W3045462440","https://openalex.org/W3045958725","https://openalex.org/W3092683697","https://openalex.org/W3099700870","https://openalex.org/W3100107515","https://openalex.org/W3100547398","https://openalex.org/W3101279614","https://openalex.org/W3104237552","https://openalex.org/W3152624702","https://openalex.org/W3154280800","https://openalex.org/W3169937871","https://openalex.org/W3180230246","https://openalex.org/W3200065025","https://openalex.org/W3217305727","https://openalex.org/W4205103289","https://openalex.org/W4226112939","https://openalex.org/W4226293366","https://openalex.org/W4252076394","https://openalex.org/W4284669756","https://openalex.org/W4285113563","https://openalex.org/W4287018294","https://openalex.org/W4300427681","https://openalex.org/W4320813768","https://openalex.org/W4384823501","https://openalex.org/W6758015726","https://openalex.org/W6764357534","https://openalex.org/W6770212971","https://openalex.org/W6779872132","https://openalex.org/W6801048709","https://openalex.org/W6811129754","https://openalex.org/W6849921291","https://openalex.org/W6852266077","https://openalex.org/W7054915730"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W1892467659","https://openalex.org/W1529400504","https://openalex.org/W1843462531","https://openalex.org/W1967370444","https://openalex.org/W2357241418","https://openalex.org/W2998403542","https://openalex.org/W2808586768","https://openalex.org/W2789919619","https://openalex.org/W2086064646"],"abstract_inverted_index":{"Abstract":[0],"MIRACL":[1,75,121,165],"is":[2,26,135,166],"a":[3,143],"multilingual":[4],"dataset":[5],"for":[6,54,152],"ad":[7],"hoc":[8],"retrieval":[9,31,141],"across":[10,142],"18":[11],"languages":[12,77],"that":[13,78,160],"collectively":[14],"encompass":[15],"over":[16,49,57],"three":[17],"billion":[18],"native":[19,69],"speakers":[20,70],"around":[21,126,155],"the":[22,34,37,41,112,156],"world.":[23],"This":[24],"resource":[25],"designed":[27],"to":[28,115,136],"support":[29],"monolingual":[30],"tasks,":[32],"where":[33,62],"queries":[35,56],"and":[36,91,106],"corpora":[38],"are":[39,79],"in":[40,59],"same":[42],"language.":[43],"In":[44,119],"total,":[45,120],"we":[46],"have":[47,65,161],"gathered":[48],"726k":[50],"high-quality":[51],"relevance":[52],"judgments":[53],"78k":[55],"Wikipedia":[58],"these":[60],"languages,":[61,146],"all":[63],"annotations":[64],"been":[66,163],"performed":[67,110],"by":[68,72],"hired":[71],"our":[73],"team.":[74],"covers":[76],"both":[80],"typologically":[81],"close":[82],"as":[83,85],"well":[84],"distant":[86],"from":[87],"10":[88],"language":[89],"families":[90],"13":[92],"sub-families,":[93],"associated":[94],"with":[95],"varying":[96],"amounts":[97],"of":[98,125,129,145],"publicly":[99],"available":[100,167],"resources.":[101],"Extensive":[102],"automatic":[103],"heuristic":[104],"verification":[105],"manual":[107],"assessments":[108],"were":[109],"during":[111],"annotation":[113],"process":[114],"control":[116],"data":[117],"quality.":[118],"represents":[122],"an":[123],"investment":[124],"five":[127],"person-years":[128],"human":[130],"annotator":[131],"effort.":[132],"Our":[133],"goal":[134],"spur":[137],"research":[138],"on":[139],"improving":[140],"continuum":[144],"thus":[147],"enhancing":[148],"information":[149],"access":[150],"capabilities":[151],"diverse":[153],"populations":[154],"world,":[157],"particularly":[158],"those":[159],"traditionally":[162],"underserved.":[164],"at":[168],"http://miracl.ai/.":[169]},"counts_by_year":[{"year":2026,"cited_by_count":9},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":6}],"updated_date":"2026-05-28T09:10:13.091523","created_date":"2025-10-10T00:00:00"}
