{"id":"https://openalex.org/W4415428429","doi":"https://doi.org/10.3233/faia251293","title":"Pirates of the RAG: Adaptively Attacking LLMs to Leak Knowledge Bases","display_name":"Pirates of the RAG: Adaptively Attacking LLMs to Leak Knowledge Bases","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428429","doi":"https://doi.org/10.3233/faia251293"},"language":"en","primary_location":{"id":"doi:10.3233/faia251293","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251293","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251293","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108719287","display_name":"C. Maio","orcid":null},"institutions":[{"id":"https://openalex.org/I108290504","display_name":"University of Pisa","ror":"https://ror.org/03ad39j10","country_code":"IT","type":"education","lineage":["https://openalex.org/I108290504"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Christian Di Maio","raw_affiliation_strings":["University of Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"University of Pisa, Italy","institution_ids":["https://openalex.org/I108290504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115647623","display_name":"Cristian Cosci","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128559","display_name":"Reply (Italy)","ror":"https://ror.org/03ma9mt41","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210128559"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Cristian Cosci","raw_affiliation_strings":["Machine Learning Reply, Turin, Italy"],"affiliations":[{"raw_affiliation_string":"Machine Learning Reply, Turin, Italy","institution_ids":["https://openalex.org/I4210128559"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080411641","display_name":"Marco Maggini","orcid":"https://orcid.org/0000-0002-6428-1265"},"institutions":[{"id":"https://openalex.org/I102064193","display_name":"University of Siena","ror":"https://ror.org/01tevnk56","country_code":"IT","type":"education","lineage":["https://openalex.org/I102064193"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marco Maggini","raw_affiliation_strings":["University of Siena, Italy"],"affiliations":[{"raw_affiliation_string":"University of Siena, Italy","institution_ids":["https://openalex.org/I102064193"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089228629","display_name":"Valentina Poggioni","orcid":"https://orcid.org/0000-0002-7691-7478"},"institutions":[{"id":"https://openalex.org/I27483092","display_name":"University of Perugia","ror":"https://ror.org/00x27da85","country_code":"IT","type":"education","lineage":["https://openalex.org/I27483092"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Valentina Poggioni","raw_affiliation_strings":["University of Perugia, Italy"],"affiliations":[{"raw_affiliation_string":"University of Perugia, Italy","institution_ids":["https://openalex.org/I27483092"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006137186","display_name":"Stefano Melacci","orcid":"https://orcid.org/0000-0002-0415-0888"},"institutions":[{"id":"https://openalex.org/I102064193","display_name":"University of Siena","ror":"https://ror.org/01tevnk56","country_code":"IT","type":"education","lineage":["https://openalex.org/I102064193"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Stefano Melacci","raw_affiliation_strings":["University of Siena, Italy"],"affiliations":[{"raw_affiliation_string":"University of Siena, Italy","institution_ids":["https://openalex.org/I102064193"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5108719287"],"corresponding_institution_ids":["https://openalex.org/I108290504"],"apc_list":null,"apc_paid":null,"fwci":5.7836,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.96533656,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.8471999764442444,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.8471999764442444,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10927","display_name":"Access Control and Trust","score":0.8313999772071838,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.8303999900817871,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.45590001344680786},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.44749999046325684},{"id":"https://openalex.org/keywords/leak","display_name":"Leak","score":0.4431999921798706},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4410000145435333},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4226999878883362},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.41920000314712524},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.37059998512268066}],"concepts":[{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.7310000061988831},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.592199981212616},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.45590001344680786},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.45239999890327454},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.44749999046325684},{"id":"https://openalex.org/C2780378346","wikidata":"https://www.wikidata.org/wiki/Q1349983","display_name":"Leak","level":2,"score":0.4431999921798706},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4410000145435333},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4226999878883362},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.41920000314712524},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.37059998512268066},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.33169999718666077},{"id":"https://openalex.org/C99221444","wikidata":"https://www.wikidata.org/wiki/Q1532069","display_name":"Private information retrieval","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.3280999958515167},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.31450000405311584},{"id":"https://openalex.org/C2987355568","wikidata":"https://www.wikidata.org/wiki/Q4420957","display_name":"Leak detection","level":3,"score":0.29989999532699585},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25949999690055847},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.25279998779296875},{"id":"https://openalex.org/C2778717966","wikidata":"https://www.wikidata.org/wiki/Q4189076","display_name":"Protection mechanism","level":3,"score":0.25099998712539673}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3233/faia251293","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251293","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},{"id":"pmh:oai:usiena-air.unisi.it:11365/1308354","is_oa":false,"landing_page_url":"https://hdl.handle.net/11365/1308354","pdf_url":null,"source":{"id":"https://openalex.org/S4377196319","display_name":"Use Siena air (University of Siena)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I102064193","host_organization_name":"University of Siena","host_organization_lineage":["https://openalex.org/I102064193"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.3233/faia251293","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251293","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,143],"growing":[1],"ubiquity":[2],"of":[3,25,52,94,100,110,162],"Retrieval-Augmented":[4],"Generation":[5],"(RAG)":[6],"systems":[7],"in":[8,114,157],"several":[9],"real-world":[10],"services":[11],"triggers":[12],"severe":[13,48],"concerns":[14],"about":[15],"their":[16],"security.":[17],"A":[18,83],"RAG":[19,66,116,163],"system":[20,67],"improves":[21],"the":[22,92,101,108,111,149,158,168],"generative":[23],"capabilities":[24],"a":[26,32,38,60,65],"Large":[27],"Language":[28],"Model":[29],"(LLM)":[30],"by":[31],"retrieval":[33],"mechanism":[34,85],"that":[35],"operates":[36],"on":[37,140],"private":[39,53,71],"knowledge":[40,72,103],"base,":[41],"whose":[42],"unintended":[43],"exposure":[44],"could":[45],"lead":[46],"to":[47,63,68,97,121,129],"consequences,":[49],"including":[50],"breaches":[51],"and":[54,81,86,118,160],"sensitive":[55],"information.":[56],"This":[57],"paper":[58],"presents":[59],"black-box":[61],"attack":[62],"force":[64],"leak":[69,98],"its":[70],"base":[73],"which,":[74],"unlike":[75],"existing":[76],"approaches,":[77,125],"is":[78],"both":[79],"adaptive":[80],"automatic.":[82],"relevance-based":[84],"an":[87],"attacker-side":[88],"open-source":[89,141,169],"LLM":[90],"favor":[91],"generation":[93],"effective":[95],"queries":[96],"most":[99],"(hidden)":[102],"base.":[104],"Extensive":[105],"experimentation":[106],"proves":[107],"quality":[109],"proposed":[112],"algorithm":[113],"different":[115],"pipelines":[117],"domains,":[119],"compared":[120],"very":[122],"recent":[123],"related":[124],"which":[126],"turn":[127],"out":[128],"be":[130],"either":[131],"not":[132,135,138],"fully":[133],"black-box,":[134],"adaptive,":[136],"or":[137],"based":[139],"models.":[142],"findings":[144],"from":[145],"our":[146,172],"study":[147],"highlight":[148],"urgent":[150],"need":[151],"for":[152,171,176],"more":[153],"robust":[154],"privacy":[155],"safeguards":[156],"design":[159],"deployment":[161],"systems.":[164],"We":[165],"have":[166],"made":[167],"code":[170],"experimental":[173],"procedure":[174],"available":[175],"public":[177],"use":[178],"[12].":[179]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
