{"id":"https://openalex.org/W7106668415","doi":"https://doi.org/10.48550/arxiv.2511.19324","title":"What Drives Cross-lingual Ranking? Retrieval Approaches with Multilingual Language Models","display_name":"What Drives Cross-lingual Ranking? Retrieval Approaches with Multilingual Language Models","publication_year":2025,"publication_date":"2025-11-24","ids":{"openalex":"https://openalex.org/W7106668415","doi":"https://doi.org/10.48550/arxiv.2511.19324"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2511.19324","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.19324","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2511.19324","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Goworek, Roksana","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Goworek, Roksana","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Macmillan-Scott, Olivia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Macmillan-Scott, Olivia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"\u00d6zyi\u011fit, Eda B.","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"\u00d6zyi\u011fit, Eda B.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.970300018787384,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.970300018787384,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.006300000008195639,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.004900000058114529,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5033000111579895},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4848000109195709},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.46309998631477356},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4424999952316284},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.44110000133514404},{"id":"https://openalex.org/keywords/multilingualism","display_name":"Multilingualism","score":0.41510000824928284},{"id":"https://openalex.org/keywords/cross-language-information-retrieval","display_name":"Cross-language information retrieval","score":0.3939000070095062},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.3716999888420105}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8133000135421753},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6362000107765198},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5978999733924866},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5033000111579895},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4848000109195709},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.46309998631477356},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4424999952316284},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.44110000133514404},{"id":"https://openalex.org/C2780035574","wikidata":"https://www.wikidata.org/wiki/Q30081","display_name":"Multilingualism","level":2,"score":0.41510000824928284},{"id":"https://openalex.org/C2778842860","wikidata":"https://www.wikidata.org/wiki/Q986551","display_name":"Cross-language information retrieval","level":3,"score":0.3939000070095062},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3930000066757202},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3716999888420105},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.359499990940094},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.3346000015735626},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2511.19324","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.19324","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2511.19324","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.19324","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6288508772850037,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Cross-lingual":[0],"information":[1],"retrieval":[2,34,56,79],"(CLIR)":[3],"enables":[4],"access":[5],"to":[6,13],"multilingual":[7,54,158],"knowledge":[8],"but":[9,117],"remains":[10],"challenging":[11],"due":[12],"disparities":[14],"in":[15,23],"resources,":[16],"scripts,":[17],"and":[18,32,40,65,68,90,102,112,137,145,160,170],"weak":[19,109],"cross-lingual":[20,152],"semantic":[21,157],"alignment":[22,163],"embedding":[24],"models.":[25],"Existing":[26],"pipelines":[27],"often":[28],"rely":[29],"on":[30,119],"translation":[31],"monolingual":[33],"heuristics,":[35],"which":[36],"add":[37],"computational":[38],"overhead":[39],"noise,":[41],"degrading":[42],"performance.":[43],"This":[44],"work":[45],"systematically":[46],"evaluates":[47],"four":[48],"intervention":[49],"types,":[50],"namely":[51],"document":[52,95],"translation,":[53],"dense":[55,78],"with":[57,108],"pretrained":[58],"encoders,":[59],"contrastive":[60],"learning":[61,98],"at":[62],"word,":[63],"phrase,":[64],"query-document":[66],"levels,":[67],"cross-encoder":[69,124],"re-ranking,":[70],"across":[71],"three":[72],"benchmark":[73],"datasets.":[74],"We":[75],"find":[76],"that":[77,151],"models":[80],"trained":[81],"specifically":[82],"for":[83,106,143,168],"CLIR":[84],"consistently":[85],"outperform":[86],"lexical":[87,136],"matching":[88],"methods":[89],"derive":[91],"little":[92],"benefit":[93],"from":[94],"translation.":[96],"Contrastive":[97],"mitigates":[99],"language":[100],"biases":[101],"yields":[103],"substantial":[104],"improvements":[105],"encoders":[107],"initial":[110],"alignment,":[111],"re-ranking":[113],"can":[114],"be":[115],"effective,":[116],"depends":[118],"the":[120,123],"quality":[121],"of":[122],"training":[125],"data.":[126],"Although":[127],"high-resource":[128],"languages":[129],"still":[130],"dominate":[131],"overall":[132],"performance,":[133],"gains":[134],"over":[135,164],"document-translated":[138],"baselines":[139],"are":[140],"most":[141],"pronounced":[142],"low-resource":[144],"cross-script":[146,169],"pairs.":[147],"These":[148],"findings":[149],"indicate":[150],"search":[153],"systems":[154],"should":[155],"prioritise":[156],"embeddings":[159],"targeted":[161],"learning-based":[162],"translation-based":[165],"pipelines,":[166],"particularly":[167],"under-resourced":[171],"languages.":[172]},"counts_by_year":[],"updated_date":"2025-11-27T01:16:37.896743","created_date":"2025-11-27T00:00:00"}
