{"id":"https://openalex.org/W4411638747","doi":"https://doi.org/10.18653/v1/2024.eacl-short.34","title":"Corpus-Steered Query Expansion with Large Language Models","display_name":"Corpus-Steered Query Expansion with Large Language Models","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4411638747","doi":"https://doi.org/10.18653/v1/2024.eacl-short.34"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2024.eacl-short.34","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.eacl-short.34","pdf_url":"https://aclanthology.org/2024.eacl-short.34.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2024.eacl-short.34.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080310094","display_name":"Yibin Lei","orcid":"https://orcid.org/0009-0007-9558-5548"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yibin Lei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103124835","display_name":"Yu Cao","orcid":"https://orcid.org/0000-0002-2630-2475"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu Cao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039076312","display_name":"Tianyi Zhou","orcid":"https://orcid.org/0000-0001-5348-0632"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tianyi Zhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100611243","display_name":"Tao Shen","orcid":"https://orcid.org/0000-0003-3315-2468"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao Shen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5059489981","display_name":"Andrew Yates","orcid":"https://orcid.org/0000-0002-5970-880X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andrew Yates","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.1383,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.89697984,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"393","last_page":"401"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.8852999806404114,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.8852999806404114,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.8517000079154968,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8367000222206116,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7719016075134277},{"id":"https://openalex.org/keywords/query-expansion","display_name":"Query expansion","score":0.7117595076560974},{"id":"https://openalex.org/keywords/query-language","display_name":"Query language","score":0.5003225803375244},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.48268961906433105},{"id":"https://openalex.org/keywords/rdf-query-language","display_name":"RDF query language","score":0.4711277186870575},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4145865738391876},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38535276055336},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3058580756187439},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.30360132455825806},{"id":"https://openalex.org/keywords/web-query-classification","display_name":"Web query classification","score":0.16760694980621338},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.10676002502441406}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7719016075134277},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.7117595076560974},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.5003225803375244},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.48268961906433105},{"id":"https://openalex.org/C96956885","wikidata":"https://www.wikidata.org/wiki/Q6138701","display_name":"RDF query language","level":5,"score":0.4711277186870575},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4145865738391876},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38535276055336},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3058580756187439},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.30360132455825806},{"id":"https://openalex.org/C118689300","wikidata":"https://www.wikidata.org/wiki/Q7978614","display_name":"Web query classification","level":4,"score":0.16760694980621338},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.10676002502441406}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.18653/v1/2024.eacl-short.34","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.eacl-short.34","pdf_url":"https://aclanthology.org/2024.eacl-short.34.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)","raw_type":"proceedings-article"},{"id":"pmh:oai:dare.uva.nl:openaire_cris_publications/46a931b3-1a05-4002-9401-739f058c66e2","is_oa":true,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/corpussteered-query-expansion-with-large-language-models(46a931b3-1a05-4002-9401-739f058c66e2).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lei, Y, Cao, Y, Zhou, T, Shen, T & Yates, A 2024, Corpus-Steered Query Expansion with Large Language Models. in Y Graham & M Purver (eds), The 18th Conference of the European Chapter of the Association for Computational Linguistics : proceedings of the conference : EACL 2024 : March 17-22, 2024. vol. 2, Kerrville, TX, pp. 393-401, 18th Conference of the European Chapter of the Association for Computational Linguistics, EACL 2024, St. Julian\ufffds, Malta, 17/03/24. https://doi.org/10.18653/v1/2024.eacl-short.34","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:dare.uva.nl:publications/46a931b3-1a05-4002-9401-739f058c66e2","is_oa":true,"landing_page_url":"https://hdl.handle.net/11245.1/46a931b3-1a05-4002-9401-739f058c66e2","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Lei, Y, Cao, Y, Zhou, T, Shen, T & Yates, A 2024, Corpus-Steered Query Expansion with Large Language Models. in Y Graham & M Purver (eds), The 18th Conference of the European Chapter of the Association for Computational Linguistics : proceedings of the conference : EACL 2024 : March 17-22, 2024. vol. 2, Kerrville, TX, pp. 393-401, 18th Conference of the European Chapter of the Association for Computational Linguistics, EACL 2024, St. Julian\ufffds, Malta, 17/03/24. https://doi.org/10.18653/v1/2024.eacl-short.34","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.18653/v1/2024.eacl-short.34","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.eacl-short.34","pdf_url":"https://aclanthology.org/2024.eacl-short.34.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5400000214576721,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411638747.pdf","grobid_xml":"https://content.openalex.org/works/W4411638747.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2096359267","https://openalex.org/W2572349046","https://openalex.org/W4256058599","https://openalex.org/W2392799717","https://openalex.org/W3125756434","https://openalex.org/W2146885082","https://openalex.org/W2026738364","https://openalex.org/W2017989738","https://openalex.org/W2970853428","https://openalex.org/W4385573081"],"abstract_inverted_index":{"Recent":[0],"studies":[1],"demonstrate":[2],"that":[3,22,119],"query":[4,100,112],"expansions":[5,34],"generated":[6],"by":[7,18,55],"large":[8],"language":[9],"models":[10],"(LLMs)":[11],"can":[12],"considerably":[13],"enhance":[14],"information":[15,46],"retrieval":[16,37],"systems":[17],"generating":[19],"hypothetical":[20],"documents":[21],"answer":[23],"the":[24,33,36,49,68,74,77,89,99,107,111,114],"queries":[25,130],"as":[26],"expansions.However,":[27],"challenges":[28],"arise":[29],"from":[30],"misalignments":[31],"between":[32,110],"and":[35,44,113],"corpus,":[38],"resulting":[39],"in":[40,88],"issues":[41],"like":[42],"hallucinations":[43],"outdated":[45],"due":[47],"to":[48,66,83,97],"limited":[50],"intrinsic":[51],"knowledge":[52,71],"of":[53,70,81],"LLMs.Inspired":[54],"Pseudo":[56],"Relevance":[57],"Feedback":[58],"(PRF),":[59],"we":[60],"introduce":[61],"Corpus-Steered":[62],"Query":[63],"Expansion":[64],"(CSQE)":[65],"promote":[67],"incorporation":[69],"embedded":[72],"within":[73],"corpus.CSQE":[75],"utilizes":[76],"relevance":[78,108],"assessing":[79],"capability":[80],"LLMs":[82,133],"systematically":[84],"identify":[85],"pivotal":[86],"sentences":[87],"initially-retrieved":[90],"documents.These":[91],"corpusoriginated":[92],"texts":[93],"are":[94],"subsequently":[95],"used":[96],"expand":[98],"together":[101],"with":[102,129],"LLM-knowledge":[103],"empowered":[104],"expansions,":[105],"improving":[106],"prediction":[109],"target":[115],"documents.Extensive":[116],"experiments":[117],"reveal":[118],"CSQE":[120],"exhibits":[121],"strong":[122],"performance":[123],"without":[124],"necessitating":[125],"any":[126],"training,":[127],"especially":[128],"for":[131],"which":[132],"lack":[134],"knowledge.":[135],"1":[136]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
