{"id":"https://openalex.org/W7160654293","doi":"https://doi.org/10.48550/arxiv.2605.05818","title":"LeakDojo: Decoding the Leakage Threats of RAG Systems","display_name":"LeakDojo: Decoding the Leakage Threats of RAG Systems","publication_year":2026,"publication_date":"2026-05-07","ids":{"openalex":"https://openalex.org/W7160654293","doi":"https://doi.org/10.48550/arxiv.2605.05818"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.05818","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.05818","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.05818","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102754125","display_name":"Maosen Zhang","orcid":"https://orcid.org/0000-0002-7859-502X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Maosen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046878222","display_name":"Jianshuo Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Jianshuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135647771","display_name":"Boting Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Boting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135648125","display_name":"Wenyue Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wenyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135677684","display_name":"Xiaoping Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiaoping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135686935","display_name":"Tianwei Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Tianwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135696709","display_name":"Han Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Han","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.33079999685287476,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.33079999685287476,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.22630000114440918,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.12520000338554382,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leakage","display_name":"Leakage (economics)","score":0.7297000288963318},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6258999705314636},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5389999747276306},{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.4878999888896942},{"id":"https://openalex.org/keywords/information-leakage","display_name":"Information leakage","score":0.38850000500679016}],"concepts":[{"id":"https://openalex.org/C2777042071","wikidata":"https://www.wikidata.org/wiki/Q6509304","display_name":"Leakage (economics)","level":2,"score":0.7297000288963318},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6258999705314636},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6022999882698059},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5389999747276306},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.4878999888896942},{"id":"https://openalex.org/C2779201187","wikidata":"https://www.wikidata.org/wiki/Q2775060","display_name":"Information leakage","level":2,"score":0.38850000500679016},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.36390000581741333},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.3391000032424927},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.2992999851703644},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2953999936580658},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2904999852180481},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.28940001130104065},{"id":"https://openalex.org/C2776841711","wikidata":"https://www.wikidata.org/wiki/Q856","display_name":"Barcode","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.25769999623298645}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.05818","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.05818","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.05818","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.05818","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4410100281238556,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Retrieval-Augmented":[0],"Generation":[1],"(RAG)":[2],"enables":[3],"large":[4],"language":[5],"models":[6],"(LLMs)":[7],"to":[8,18,83],"leverage":[9],"external":[10],"knowledge,":[11],"but":[12],"also":[13],"exposes":[14],"valuable":[15],"RAG":[16,22,40,53,69,106,122],"databases":[17],"leakage":[19,41,87,100,111,123],"attacks.":[20],"As":[21],"systems":[23],"grow":[24],"more":[25],"complex":[26],"and":[27,67,78,102,120],"LLMs":[28],"exhibit":[29],"stronger":[30,94],"instruction-following":[31,95],"capabilities,":[32],"existing":[33,60],"studies":[34],"fall":[35],"short":[36],"of":[37,52],"systematically":[38],"assessing":[39],"risks.":[42],"We":[43],"present":[44],"LeakDojo,":[45,56],"a":[46],"configurable":[47],"framework":[48],"for":[49,118],"controlled":[50],"evaluation":[51],"leakage.":[54],"Using":[55],"we":[57],"benchmark":[58],"six":[59],"attacks":[61],"across":[62],"fourteen":[63],"LLMs,":[64],"four":[65],"datasets,":[66],"diverse":[68],"systems.":[70],"Our":[71,126],"study":[72],"reveals":[73],"that":[74],"(1)":[75],"query":[76],"generation":[77],"adversarial":[79],"instructions":[80],"contribute":[81],"independently":[82],"leakage,":[84],"with":[85,98],"overall":[86],"well":[88],"approximated":[89],"by":[90],"their":[91],"product;":[92],"(2)":[93],"capability":[96],"correlates":[97],"higher":[99],"risk;":[101],"(3)":[103],"improvements":[104],"in":[105,124],"faithfulness":[107],"can":[108],"introduce":[109],"increased":[110],"risk.":[112],"These":[113],"findings":[114],"provide":[115],"actionable":[116],"insights":[117],"understanding":[119],"mitigating":[121],"practice.":[125],"codebase":[127],"is":[128],"available":[129],"at":[130],"https://github.com/yeasen-z/LeakDojo.":[131]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-09T00:00:00"}
