{"id":"https://openalex.org/W7161135670","doi":"https://doi.org/10.48550/arxiv.2605.13052","title":"RAG-Enhanced Large Language Models for Dynamic Content Expiration Prediction in Web Search","display_name":"RAG-Enhanced Large Language Models for Dynamic Content Expiration Prediction in Web Search","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W7161135670","doi":"https://doi.org/10.48550/arxiv.2605.13052"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.13052","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13052","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.13052","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136153767","display_name":"Tingyu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Tingyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136165563","display_name":"Wenkai Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Wenkai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047801985","display_name":"Li Gao","orcid":"https://orcid.org/0009-0001-2371-4351"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136117843","display_name":"Lixin Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Lixin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136130146","display_name":"Ge Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ge","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136095554","display_name":"Dawei Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Dawei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136151209","display_name":"Daiting Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Daiting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.8935999870300293,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.8935999870300293,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10737","display_name":"Health Literacy and Information Accessibility","score":0.021900000050663948,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.012299999594688416,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6402000188827515},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4390000104904175},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.41019999980926514},{"id":"https://openalex.org/keywords/user-generated-content","display_name":"User-generated content","score":0.40799999237060547},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.39890000224113464},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.3862999975681305},{"id":"https://openalex.org/keywords/expiration-date","display_name":"Expiration date","score":0.32330000400543213},{"id":"https://openalex.org/keywords/boundary","display_name":"Boundary (topology)","score":0.3046000003814697}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7760000228881836},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6402000188827515},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.46050000190734863},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4390000104904175},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41019999980926514},{"id":"https://openalex.org/C101293273","wikidata":"https://www.wikidata.org/wiki/Q579716","display_name":"User-generated content","level":3,"score":0.40799999237060547},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.39890000224113464},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3862999975681305},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3529999852180481},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3434000015258789},{"id":"https://openalex.org/C2780584464","wikidata":"https://www.wikidata.org/wiki/Q898931","display_name":"Expiration date","level":2,"score":0.32330000400543213},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3037000000476837},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.30320000648498535},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.290800005197525},{"id":"https://openalex.org/C2778152352","wikidata":"https://www.wikidata.org/wiki/Q5165061","display_name":"Content (measure theory)","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2702000141143799},{"id":"https://openalex.org/C2776324614","wikidata":"https://www.wikidata.org/wiki/Q3948731","display_name":"Web content","level":3,"score":0.26409998536109924},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.263700008392334},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C2777851325","wikidata":"https://www.wikidata.org/wiki/Q7094102","display_name":"Online model","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C92446256","wikidata":"https://www.wikidata.org/wiki/Q3306762","display_name":"Data validation","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C53605480","wikidata":"https://www.wikidata.org/wiki/Q852595","display_name":"Geotagging","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2554999887943268}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.13052","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13052","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.13052","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13052","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.41377362608909607,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"commercial":[1],"web":[2],"search,":[3,62],"aligning":[4],"content":[5,33],"freshness":[6,129],"with":[7,100],"user":[8,97,131],"intent":[9],"remains":[10],"challenging":[11],"due":[12],"to":[13,82,105],"the":[14,43,135],"highly":[15],"varied":[16],"lifespans":[17],"of":[18,137],"information.":[19],"Traditional":[20],"industrial":[21,146],"approaches":[22],"rely":[23],"on":[24,96,119],"static":[25],"time-window":[26],"filtering,":[27],"resulting":[28],"in":[29,60,127],"\"one-size-fits-all\"":[30],"rankings":[31],"where":[32],"may":[34],"be":[35],"chronologically":[36],"recent":[37],"but":[38],"semantically":[39],"expired.":[40],"To":[41],"address":[42],"limitation,":[44],"we":[45],"present":[46],"a":[47,66,84],"novel":[48],"Large":[49],"Language":[50],"Models":[51],"(LLMs)-based":[52],"Query-Aware":[53],"Dynamic":[54],"Content":[55],"Expiration":[56],"Prediction":[57],"Framework":[58],"deployed":[59],"Baidu":[61],"reformulating":[63],"timeliness":[64],"as":[65],"dynamic":[67],"validity":[68],"inference":[69],"task.":[70],"Our":[71],"framework":[72],"extracts":[73],"fine-grained":[74],"temporal":[75],"contexts":[76],"from":[77],"documents":[78],"and":[79,115,130],"leverages":[80],"LLMs":[81],"deduce":[83],"query-specific":[85],"\"validity":[86],"horizon\"-a":[87],"semantic":[88,142],"boundary":[89],"defining":[90],"when":[91],"information":[92],"becomes":[93],"obsolete":[94],"based":[95],"intent.":[98],"Integrated":[99],"robust":[101],"hallucination":[102],"mitigation":[103],"strategies":[104],"ensure":[106],"reliability,":[107],"our":[108],"approach":[109],"has":[110],"been":[111],"evaluated":[112],"through":[113],"offline":[114],"online":[116],"A/B":[117],"testing":[118],"live":[120],"production":[121],"traffic.":[122],"Results":[123],"demonstrate":[124],"significant":[125],"improvements":[126],"search":[128],"experience":[132],"metrics,":[133],"validating":[134],"effectiveness":[136],"LLM-driven":[138],"reasoning":[139],"for":[140],"solving":[141],"expiration":[143],"at":[144],"an":[145],"scale.":[147]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-15T00:00:00"}
