{"id":"https://openalex.org/W4392384312","doi":"https://doi.org/10.1145/3616855.3635691","title":"Vector Search with OpenAI Embeddings: Lucene Is All You Need","display_name":"Vector Search with OpenAI Embeddings: Lucene Is All You Need","publication_year":2024,"publication_date":"2024-03-04","ids":{"openalex":"https://openalex.org/W4392384312","doi":"https://doi.org/10.1145/3616855.3635691"},"language":"en","primary_location":{"id":"doi:10.1145/3616855.3635691","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3616855.3635691","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036134309","display_name":"Jasper Xian","orcid":"https://orcid.org/0009-0004-2740-6120"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Jasper Xian","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083417786","display_name":"Tommaso Teofili","orcid":"https://orcid.org/0000-0002-4372-0273"},"institutions":[{"id":"https://openalex.org/I119003972","display_name":"Roma Tre University","ror":"https://ror.org/05vf0dg29","country_code":"IT","type":"education","lineage":["https://openalex.org/I119003972"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Tommaso Teofili","raw_affiliation_strings":["Roma Tre University, Rome, Italy"],"affiliations":[{"raw_affiliation_string":"Roma Tre University, Rome, Italy","institution_ids":["https://openalex.org/I119003972"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101480198","display_name":"Ronak Pradeep","orcid":"https://orcid.org/0000-0001-6296-601X"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ronak Pradeep","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082997975","display_name":"Jimmy Lin","orcid":"https://orcid.org/0000-0002-0661-7189"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jimmy Lin","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036134309"],"corresponding_institution_ids":["https://openalex.org/I151746483"],"apc_list":null,"apc_paid":null,"fwci":9.8794,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.98421209,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1090","last_page":"1093"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9765999913215637,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.729127049446106},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.6078138947486877},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5467650890350342},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.48322948813438416},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.476661741733551},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3453952670097351},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33426064252853394},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.112619549036026},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.0760774314403534}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.729127049446106},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.6078138947486877},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5467650890350342},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.48322948813438416},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.476661741733551},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3453952670097351},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33426064252853394},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.112619549036026},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0760774314403534},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3616855.3635691","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3616855.3635691","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6499999761581421,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G2165548363","display_name":null,"funder_award_id":"Canada","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"},{"id":"https://openalex.org/G8284766523","display_name":null,"funder_award_id":"(NSERC)","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2114303224","https://openalex.org/W2899154813","https://openalex.org/W2963469388","https://openalex.org/W2998702515","https://openalex.org/W3099700870","https://openalex.org/W3154670582","https://openalex.org/W3184918446","https://openalex.org/W4213045215","https://openalex.org/W4284664419","https://openalex.org/W4284682639","https://openalex.org/W4382449327","https://openalex.org/W4385570290","https://openalex.org/W4385570706","https://openalex.org/W4387846582","https://openalex.org/W6600263792","https://openalex.org/W6601548533"],"related_works":["https://openalex.org/W4390516098","https://openalex.org/W2118564381","https://openalex.org/W2163901716","https://openalex.org/W2152204162","https://openalex.org/W2739821120","https://openalex.org/W2150136235","https://openalex.org/W2026095310","https://openalex.org/W2140661912","https://openalex.org/W2056806613","https://openalex.org/W2153069032"],"abstract_inverted_index":{"We":[0],"provide":[1,73],"a":[2,36,78,86,96,101,106],"reproducible,":[3],"end-to-end":[4],"demonstration":[5],"of":[6,26,45],"vector":[7,38,74,103],"search":[8,75],"with":[9],"OpenAI":[10],"embeddings":[11],"using":[12],"Lucene":[13,69],"on":[14],"the":[15,32,57],"popular":[16],"MS":[17],"MARCO":[18],"passage":[19],"ranking":[20],"test":[21],"collection.":[22],"The":[23],"main":[24],"goal":[25],"our":[27],"work":[28],"is":[29,40],"to":[30,42,54,72,94,99],"challenge":[31],"prevailing":[33],"narrative":[34],"that":[35,61],"dedicated":[37,102],"store":[39,104],"necessary":[41],"take":[43],"advantage":[44],"recent":[46],"advances":[47],"in":[48,68,77,120],"deep":[49],"neural":[50],"networks":[51],"as":[52],"applied":[53],"search.":[55],"Quite":[56],"contrary,":[58],"we":[59],"show":[60],"hierarchical":[62],"navigable":[63],"small-world":[64],"network":[65],"(HNSW)":[66],"indexes":[67],"are":[70],"adequate":[71],"capabilities":[76],"standard":[79],"bi-encoder":[80],"architecture.":[81],"This":[82],"suggests":[83],"that,":[84],"from":[85],"simple":[87],"cost-benefit":[88],"analysis,":[89],"there":[90],"does":[91],"not":[92],"appear":[93],"be":[95],"compelling":[97],"reason":[98],"introduce":[100],"into":[105],"modern":[107],"\"AI":[108],"stack\"":[109],"for":[110],"search,":[111],"since":[112],"such":[113],"applications":[114],"have":[115],"already":[116],"received":[117],"substantial":[118],"investments":[119],"existing,":[121],"widely":[122],"deployed":[123],"infrastructure.":[124]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":17},{"year":2024,"cited_by_count":9}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
