{"id":"https://openalex.org/W7163077403","doi":"https://doi.org/10.48550/arxiv.2605.30917","title":"Inference-Free Multimodal Learned Sparse Retrieval for Production-Scale Visual Document Search","display_name":"Inference-Free Multimodal Learned Sparse Retrieval for Production-Scale Visual Document Search","publication_year":2026,"publication_date":"2026-05-29","ids":{"openalex":"https://openalex.org/W7163077403","doi":"https://doi.org/10.48550/arxiv.2605.30917"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.30917","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.30917","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.30917","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137570982","display_name":"Gyu-Hwung Cho","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]},{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Cho, Gyu-Hwung","raw_affiliation_strings":["NAVER Corp., Republic of Korea","Seoul National University, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER Corp., Republic of Korea","institution_ids":["https://openalex.org/I60922564"]},{"raw_affiliation_string":"Seoul National University, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007984753","display_name":"Youngjune Lee","orcid":"https://orcid.org/0009-0008-1997-4135"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Lee, Youngjune","raw_affiliation_strings":["NAVER Corp., Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER Corp., Republic of Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113139052","display_name":"Kiyoon Jeong","orcid":null},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jeong, Kiyoon","raw_affiliation_strings":["NAVER Corp., Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER Corp., Republic of Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137559755","display_name":"Siyoung Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Lee, Siyoung","raw_affiliation_strings":["NAVER Corp., Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER Corp., Republic of Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073143756","display_name":"Sanggyu Han","orcid":"https://orcid.org/0009-0004-8571-1983"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Han, Sanggyu","raw_affiliation_strings":["NAVER Corp., Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER Corp., Republic of Korea","institution_ids":["https://openalex.org/I60922564"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137579280","display_name":"Herv\u00e9 Dejean","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dejean, Herv\u00e9","raw_affiliation_strings":["Naver Labs Europe, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Naver Labs Europe, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031052064","display_name":"St\u00e9phane Clinchant","orcid":"https://orcid.org/0000-0003-2367-8837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Clinchant, St\u00e9phane","raw_affiliation_strings":["Naver Labs Europe, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Naver Labs Europe, France","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5137533314","display_name":"Seung-won Hwang","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hwang, Seung-won","raw_affiliation_strings":["Seoul National University, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8694999814033508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8694999814033508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.0778999999165535,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.016599999740719795,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5936999917030334},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5152999758720398},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5117999911308289},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.44760000705718994},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.4129999876022339},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.38530001044273376},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.3644999861717224},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.36390000581741333}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.840399980545044},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6141999959945679},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5936999917030334},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5629000067710876},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5152999758720398},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5117999911308289},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4966000020503998},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.44760000705718994},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.4129999876022339},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.38530001044273376},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3644999861717224},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.36390000581741333},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.3517000079154968},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.335099995136261},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.30880001187324524},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3061000108718872},{"id":"https://openalex.org/C77637269","wikidata":"https://www.wikidata.org/wiki/Q7002051","display_name":"Neural coding","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2621999979019165},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2540000081062317}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.30917","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.30917","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.30917","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.30917","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.4273194372653961}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"large-scale":[1],"visual-document":[2,14,93,171],"corpora":[3],"such":[4,96],"as":[5,154],"arXiv":[6],"papers":[7],"and":[8,105,181,206],"enterprise":[9],"PDFs":[10],"continue":[11],"to":[12,32,119,129,157,184,216],"grow,":[13],"retrieval":[15,45,101,172],"has":[16],"gained":[17],"increasing":[18],"attention;":[19],"yet":[20,108],"it":[21,196],"still":[22],"lacks":[23],"a":[24,120,147],"deployable":[25],"system":[26],"that":[27,150],"lexically":[28],"indexes":[29],"visual":[30,124],"documents":[31],"serve":[33],"queries":[34],"without":[35],"neural":[36,55],"encoding":[37,57,64],"at":[38,58,70,224],"scale.":[39],"Existing":[40],"methods":[41],"either":[42],"achieve":[43],"strong":[44],"quality":[46],"with":[47,65],"VLM-based":[48],"dense":[49,179,204],"or":[50,61,67,77,188],"multi-vector":[51],"models":[52],"but":[53],"require":[54],"query":[56,63],"serving":[59,83],"time,":[60],"avoid":[62],"OCR-":[66,187],"caption-based":[68,189],"BM25":[69,190],"the":[71,131,177,202],"cost":[72],"of":[73],"time-consuming":[74],"text":[75],"extraction":[76],"generation.":[78],"To":[79,138],"fill":[80],"this":[81,117,140,163],"missing":[82],"regime,":[84],"we":[85,142],"present":[86],"V-SPLADE,":[87],"an":[88,193],"inference-free":[89,97],"sparse":[90,100,125],"retriever":[91],"for":[92],"retrieval.":[94],"However,":[95],"multimodal":[98],"learned":[99],"systems":[102],"remain":[103],"underexplored":[104],"have":[106],"not":[107],"shown":[109],"dense-level":[110],"effectiveness":[111],"under":[112],"high":[113],"sparsity.":[114],"We":[115],"attribute":[116],"limitation":[118],"lexical":[121,132,155],"grounding":[122],"problem:":[123],"representations":[126],"often":[127],"fail":[128],"capture":[130],"content":[133],"embedded":[134],"in":[135],"document":[136],"images.":[137],"address":[139],"problem,":[141],"introduce":[143],"caption-gated":[144],"token":[145],"supervision,":[146,164],"training-only":[148],"signal":[149],"uses":[151],"VLM-generated":[152],"captions":[153],"cues":[156],"activate":[158],"retrieval-relevant":[159],"vocabulary":[160],"dimensions.":[161],"With":[162],"V-SPLADE":[165],"improves":[166,208],"average":[167],"NDCG@5":[168],"across":[169],"six":[170],"benchmarks":[173],"by":[174,182,214],"+13.8pp":[175],"over":[176,186,201],"same-scale":[178,203],"baseline":[180,205],"up":[183,215],"+6.3pp":[185],"baselines.":[191],"On":[192],"18.7M-document":[194],"corpus,":[195],"more":[197],"than":[198],"doubles":[199],"R@5":[200],"further":[207],"competing":[209],"retrievers":[210],"through":[211],"score":[212],"fusion":[213],"+2.4pp":[217],"R@5.":[218],"Code":[219],"will":[220],"be":[221],"released":[222],"soon":[223],"https://github.com/naver/v-splade.":[225]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-06-02T00:00:00"}
