{"id":"https://openalex.org/W4385679821","doi":"https://doi.org/10.1109/sp46215.2023.10179300","title":"Analyzing Leakage of Personally Identifiable Information in Language Models","display_name":"Analyzing Leakage of Personally Identifiable Information in Language Models","publication_year":2023,"publication_date":"2023-05-01","ids":{"openalex":"https://openalex.org/W4385679821","doi":"https://doi.org/10.1109/sp46215.2023.10179300"},"language":"en","primary_location":{"id":"doi:10.1109/sp46215.2023.10179300","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sp46215.2023.10179300","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Symposium on Security and Privacy (SP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086633938","display_name":"Nils Lukas","orcid":"https://orcid.org/0009-0001-5891-9154"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Nils Lukas","raw_affiliation_strings":["University of Waterloo"],"affiliations":[{"raw_affiliation_string":"University of Waterloo","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103045221","display_name":"Ahmed Salem","orcid":"https://orcid.org/0000-0002-0456-2276"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ahmed Salem","raw_affiliation_strings":["Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090308773","display_name":"Robert B. Sim","orcid":"https://orcid.org/0000-0002-2855-7455"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Robert Sim","raw_affiliation_strings":["Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076686201","display_name":"Shruti Tople","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shruti Tople","raw_affiliation_strings":["Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057803811","display_name":"Lukas Wutschitz","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Lukas Wutschitz","raw_affiliation_strings":["Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088954009","display_name":"Santiago Zanella-B\u00e9guelin","orcid":"https://orcid.org/0000-0003-0479-9967"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Santiago Zanella-B\u00e9guelin","raw_affiliation_strings":["Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5086633938"],"corresponding_institution_ids":["https://openalex.org/I151746483"],"apc_list":null,"apc_paid":null,"fwci":20.492,"has_fulltext":false,"cited_by_count":119,"citation_normalized_percentile":{"value":0.99588418,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"346","last_page":"363"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10927","display_name":"Access Control and Trust","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6554122567176819},{"id":"https://openalex.org/keywords/information-leakage","display_name":"Information leakage","score":0.5405498743057251},{"id":"https://openalex.org/keywords/leakage","display_name":"Leakage (economics)","score":0.5234332084655762},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.42640674114227295},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34896576404571533},{"id":"https://openalex.org/keywords/internet-privacy","display_name":"Internet privacy","score":0.3215330243110657},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.3167656660079956}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6554122567176819},{"id":"https://openalex.org/C2779201187","wikidata":"https://www.wikidata.org/wiki/Q2775060","display_name":"Information leakage","level":2,"score":0.5405498743057251},{"id":"https://openalex.org/C2777042071","wikidata":"https://www.wikidata.org/wiki/Q6509304","display_name":"Leakage (economics)","level":2,"score":0.5234332084655762},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42640674114227295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34896576404571533},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.3215330243110657},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3167656660079956},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C139719470","wikidata":"https://www.wikidata.org/wiki/Q39680","display_name":"Macroeconomics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sp46215.2023.10179300","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sp46215.2023.10179300","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Symposium on Security and Privacy (SP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":79,"referenced_works":["https://openalex.org/W1557833142","https://openalex.org/W1873763122","https://openalex.org/W2020278455","https://openalex.org/W2083680779","https://openalex.org/W2088911157","https://openalex.org/W2296283641","https://openalex.org/W2473418344","https://openalex.org/W2535690855","https://openalex.org/W2747329762","https://openalex.org/W2752929869","https://openalex.org/W2947160092","https://openalex.org/W2947686949","https://openalex.org/W2962784628","https://openalex.org/W2962854673","https://openalex.org/W2963096510","https://openalex.org/W2963693643","https://openalex.org/W2965373594","https://openalex.org/W2966746916","https://openalex.org/W2981338466","https://openalex.org/W2988647680","https://openalex.org/W3027379683","https://openalex.org/W3035261884","https://openalex.org/W3096214574","https://openalex.org/W3130178918","https://openalex.org/W3153896080","https://openalex.org/W3165327186","https://openalex.org/W3176477796","https://openalex.org/W3213407400","https://openalex.org/W4221159672","https://openalex.org/W4225591000","https://openalex.org/W4229053728","https://openalex.org/W4281764334","https://openalex.org/W4283172211","https://openalex.org/W4283805899","https://openalex.org/W4285224048","https://openalex.org/W4286961857","https://openalex.org/W4287391717","https://openalex.org/W4287634898","https://openalex.org/W4287888099","https://openalex.org/W4288044264","https://openalex.org/W4288057780","https://openalex.org/W4288089799","https://openalex.org/W4292793781","https://openalex.org/W4297900086","https://openalex.org/W4307937235","https://openalex.org/W4309088836","https://openalex.org/W4310895557","https://openalex.org/W4320458011","https://openalex.org/W4385245566","https://openalex.org/W4385573569","https://openalex.org/W4385573947","https://openalex.org/W4399971973","https://openalex.org/W6602294163","https://openalex.org/W6606406289","https://openalex.org/W6638575559","https://openalex.org/W6757817989","https://openalex.org/W6763393573","https://openalex.org/W6763450642","https://openalex.org/W6766673545","https://openalex.org/W6769627184","https://openalex.org/W6778883912","https://openalex.org/W6787335730","https://openalex.org/W6788811087","https://openalex.org/W6790811189","https://openalex.org/W6794353620","https://openalex.org/W6801929890","https://openalex.org/W6802709103","https://openalex.org/W6809847026","https://openalex.org/W6810220367","https://openalex.org/W6810332117","https://openalex.org/W6810463509","https://openalex.org/W6810932996","https://openalex.org/W6838552513","https://openalex.org/W6838995493","https://openalex.org/W6839820251","https://openalex.org/W6840412424","https://openalex.org/W6841011359","https://openalex.org/W6843491178","https://openalex.org/W6846328044"],"related_works":["https://openalex.org/W2789919619","https://openalex.org/W2293457016","https://openalex.org/W3169305685","https://openalex.org/W2351428524","https://openalex.org/W1551406738","https://openalex.org/W2610387714","https://openalex.org/W2369308426","https://openalex.org/W1569841287","https://openalex.org/W1512718085","https://openalex.org/W2359001871"],"abstract_inverted_index":{"Language":[0],"Models":[1],"(LMs)":[2],"have":[3],"been":[4],"shown":[5],"to":[6,37,50,93,103,136,172,213],"leak":[7],"information":[8],"about":[9,195],"training":[10],"data":[11],"through":[12],"sentence-level":[13,183],"membership":[14,207],"inference":[15,208],"and":[16,71,79,129,149,159,200,209],"reconstruction":[17,130],"attacks.":[18],"Understanding":[19],"the":[20,38,61,74,81,84,87,142,187,218],"risk":[21,62,188],"of":[22,63,83,122,189,197],"LMs":[23],"leaking":[24],"Personally":[25],"Identifiable":[26],"Information":[27],"(PII)":[28],"has":[29],"received":[30],"less":[31],"attention,":[32],"which":[33,94],"can":[34,169],"be":[35],"attributed":[36],"false":[39],"assumption":[40],"that":[41,168,182],"dataset":[42],"curation":[43],"techniques":[44,55],"such":[45,98],"as":[46,99],"scrubbing":[47,68],"are":[48,164],"sufficient":[49],"prevent":[51,60,108],"PII":[52,64,109,123,175,190,198,210],"leakage.":[53],"Scrubbing":[54],"reduce":[56],"but":[57,192],"do":[58],"not":[59],"leakage:":[65],"in":[66,152,217],"practice":[67],"is":[69,91,220],"imperfect":[70],"must":[72],"balance":[73],"trade-off":[75],"between":[76,205],"minimizing":[77],"disclosure":[78,191],"preserving":[80],"utility":[82],"dataset.":[85],"On":[86],"other":[88],"hand,":[89],"it":[90],"unclear":[92],"extent":[95],"algorithmic":[96],"defenses":[97,151],"differential":[100,184],"privacy,":[101,107],"designed":[102],"guarantee":[104],"sentence-or":[105],"user-level":[106],"disclosure.":[110],"In":[111],"this":[112],"work,":[113],"we":[114],"introduce":[115],"rigorous":[116],"game-based":[117],"definitions":[118],"for":[119],"three":[120,153],"types":[121],"leakage":[124],"via":[125],"black-box":[126],"extraction,":[127],"inference,":[128],"attacks":[131,143,167],"with":[132,148],"only":[133],"API":[134],"access":[135],"an":[137],"LM.":[138],"We":[139],"empirically":[140],"evaluate":[141],"against":[144],"GPT-2":[145],"models":[146],"fine-tuned":[147],"without":[150],"domains:":[154],"case":[155],"law,":[156],"health":[157],"care,":[158],"e-mails.":[160],"Our":[161],"main":[162],"contributions":[163],"(i)":[165],"novel":[166],"extract":[170],"up":[171],"10\u00d7":[173],"more":[174],"sequences":[176],"than":[177],"existing":[178],"attacks,":[179],"(ii)":[180],"showing":[181],"privacy":[185],"reduces":[186],"still":[193],"leaks":[194],"3%":[196],"sequences,":[199],"(iii)":[201],"a":[202],"subtle":[203],"connection":[204],"record-level":[206],"reconstruction.":[211],"Code":[212],"reproduce":[214],"all":[215],"experiments":[216],"paper":[219],"available":[221],"at":[222],"https://github.com/microsoft/analysing_pii_leakage.":[223]},"counts_by_year":[{"year":2026,"cited_by_count":14},{"year":2025,"cited_by_count":67},{"year":2024,"cited_by_count":31},{"year":2023,"cited_by_count":7}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
