{"id":"https://openalex.org/W7133662660","doi":"https://doi.org/10.48550/arxiv.2603.04238","title":"Retrieval or Representation? Reassessing Benchmark Gaps in Multilingual and Visually Rich RAG","display_name":"Retrieval or Representation? Reassessing Benchmark Gaps in Multilingual and Visually Rich RAG","publication_year":2026,"publication_date":"2026-03-04","ids":{"openalex":"https://openalex.org/W7133662660","doi":"https://doi.org/10.48550/arxiv.2603.04238"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.04238","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004304345","display_name":"Martin Asenov","orcid":"https://orcid.org/0000-0003-4610-3112"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Asenov, Martin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114601845","display_name":"Kenza Benkirane","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benkirane, Kenza","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064040711","display_name":"Dan Goldwater","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goldwater, Dan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5083561499","display_name":"Aneiss Ghodsi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghodsi, Aneiss","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8514000177383423,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8514000177383423,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.05009999871253967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.021299999207258224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6220999956130981},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.608299970626831},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.5227000117301941},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5194000005722046},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.423799991607666},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4092999994754791},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4065999984741211},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.36719998717308044}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8220999836921692},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6220999956130981},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.608299970626831},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5583999752998352},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.551800012588501},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5271000266075134},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.5227000117301941},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5194000005722046},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.423799991607666},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4092999994754791},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4065999984741211},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.36719998717308044},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.3634999990463257},{"id":"https://openalex.org/C551230270","wikidata":"https://www.wikidata.org/wiki/Q4368942","display_name":"Data retrieval","level":2,"score":0.3495999872684479},{"id":"https://openalex.org/C44083865","wikidata":"https://www.wikidata.org/wiki/Q3853443","display_name":"Mean reciprocal rank","level":2,"score":0.3424000144004822},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.301800012588501},{"id":"https://openalex.org/C2985933255","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Text retrieval","level":2,"score":0.2922999858856201},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.27619999647140503},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26420000195503235}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.04238","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.04238","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.04238","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.04238","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.6340810060501099,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Retrieval-augmented":[0],"generation":[1],"(RAG)":[2],"is":[3,64],"a":[4],"common":[5],"way":[6],"to":[7,114],"ground":[8],"language":[9],"models":[10],"in":[11],"external":[12],"documents":[13,29,53],"and":[14,75,94,108,118],"up-to-date":[15],"information.":[16],"Classical":[17],"retrieval":[18,81,109],"systems":[19],"relied":[20],"on":[21,40,92],"lexical":[22],"methods":[23,77],"such":[24],"as":[25],"BM25,":[26],"which":[27],"rank":[28],"by":[30],"term":[31],"overlap":[32],"with":[33,54],"corpus-level":[34],"weighting.":[35],"End-to-end":[36],"multimodal":[37],"retrievers":[38],"trained":[39],"large":[41,90],"query-document":[42],"datasets":[43],"claim":[44],"substantial":[45],"improvements":[46],"over":[47],"these":[48],"approaches,":[49],"especially":[50],"for":[51,100],"multilingual":[52,93],"complex":[55],"visual":[56,95],"layouts.":[57],"We":[58],"demonstrate":[59,85],"that":[60,86,104],"better":[61],"document":[62],"representation":[63],"the":[65,80,112],"primary":[66],"driver":[67],"of":[68],"benchmark":[69],"improvements.":[70],"By":[71],"systematically":[72],"varying":[73],"transcription":[74,107],"preprocessing":[76],"while":[78],"holding":[79],"mechanism":[82],"fixed,":[83],"we":[84],"BM25":[87],"can":[88],"recover":[89],"gaps":[91],"benchmarks.":[96],"Our":[97],"findings":[98],"call":[99],"decomposed":[101],"evaluation":[102],"benchmarks":[103],"separately":[105],"measure":[106],"capabilities,":[110],"enabling":[111],"field":[113],"correctly":[115],"attribute":[116],"progress":[117],"focus":[119],"effort":[120],"where":[121],"it":[122],"matters.":[123]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-06T00:00:00"}
